File size: 3,310 Bytes
7026716
0ec04e2
7026716
 
0ec04e2
e73ae5f
 
 
 
 
 
 
d6b7bed
 
 
 
1561b35
7026716
d6b7bed
 
 
 
0ec04e2
 
7026716
 
 
e73ae5f
7026716
 
 
0ec04e2
 
7026716
 
 
 
 
0ec04e2
 
 
d6b7bed
 
 
 
 
 
 
e73ae5f
7026716
0ec04e2
 
 
7026716
0ec04e2
7026716
0ec04e2
 
 
1561b35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ec04e2
1561b35
 
 
 
 
e73ae5f
 
 
 
 
 
1561b35
 
 
 
0ec04e2
1561b35
0ec04e2
 
e73ae5f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import gradio as gr
from openai import OpenAI
from typing import List, Tuple

# Define available models
AVAILABLE_MODELS = {
    "DeepSeek V3": "deepseek-ai/DeepSeek-V3",
    "Llama3.3-70b-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
    "Llama3.1-8b-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
}

HYPERB_ENDPOINT_URL = "https://api.hyperbolic.xyz/v1"
HF_ENDPOINT_URL = "https://huggingface.co/api/inference-proxy/together"
HYPERB_API_KEY = os.getenv('HYPERBOLIC_XYZ_KEY')
HF_API_KEY = os.getenv('HF_KEY')
PASSWORD = os.getenv("PASSWD")  # Store the password in an environment variable

DEPLOY_TO_HF = ["deepseek-ai/DeepSeek-V3"]

hyperb_client = OpenAI(base_url=HYPERB_ENDPOINT_URL, api_key=HYPERB_API_KEY)
hf_client = OpenAI(base_url=HF_ENDPOINT_URL, api_key=HF_API_KEY)

def respond(
    message: str,
    history: List[Tuple[str, str]],
    system_message: str,
    model_choice: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    response = ""
    
    if model_choice in DEPLOY_TO_HF:
        this_client = hf_client
    else:
        this_client = hyperb_client

    for chunk in this_client.chat.completions.create(
        model=AVAILABLE_MODELS[model_choice],  # Use the selected model
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True,
    ):
        token = chunk.choices[0].delta.content or ""
        response += token
        yield response

def check_password(input_password):
    if input_password == PASSWORD:
        return gr.update(visible=False), gr.update(visible=True)
    else:
        return gr.update(value="", interactive=True), gr.update(visible=False)

with gr.Blocks() as demo:
    with gr.Column():
        password_input = gr.Textbox(
            type="password", label="Enter Password", interactive=True
        )
        submit_button = gr.Button("Submit")
        error_message = gr.Textbox(
            label="Error", visible=False, interactive=False
        )

    with gr.Column(visible=False) as chat_interface:
        chat = gr.ChatInterface(
            respond,
            additional_inputs=[
                gr.Textbox(value="You are a helpful assistant.", label="System message"),
                gr.Dropdown(
                    choices=list(AVAILABLE_MODELS.keys()),
                    value=list(AVAILABLE_MODELS.keys())[0],
                    label="Select Model"
                ),
                gr.Slider(minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
            ],
        )

    submit_button.click(check_password, inputs=password_input, outputs=[password_input, chat_interface])

if __name__ == "__main__":
    demo.launch(share=True)