Spaces:
Sleeping
Sleeping
File size: 3,310 Bytes
7026716 0ec04e2 7026716 0ec04e2 e73ae5f d6b7bed 1561b35 7026716 d6b7bed 0ec04e2 7026716 e73ae5f 7026716 0ec04e2 7026716 0ec04e2 d6b7bed e73ae5f 7026716 0ec04e2 7026716 0ec04e2 7026716 0ec04e2 1561b35 0ec04e2 1561b35 e73ae5f 1561b35 0ec04e2 1561b35 0ec04e2 e73ae5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import os
import gradio as gr
from openai import OpenAI
from typing import List, Tuple
# Define available models
AVAILABLE_MODELS = {
"DeepSeek V3": "deepseek-ai/DeepSeek-V3",
"Llama3.3-70b-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
"Llama3.1-8b-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
}
HYPERB_ENDPOINT_URL = "https://api.hyperbolic.xyz/v1"
HF_ENDPOINT_URL = "https://huggingface.co/api/inference-proxy/together"
HYPERB_API_KEY = os.getenv('HYPERBOLIC_XYZ_KEY')
HF_API_KEY = os.getenv('HF_KEY')
PASSWORD = os.getenv("PASSWD") # Store the password in an environment variable
DEPLOY_TO_HF = ["deepseek-ai/DeepSeek-V3"]
hyperb_client = OpenAI(base_url=HYPERB_ENDPOINT_URL, api_key=HYPERB_API_KEY)
hf_client = OpenAI(base_url=HF_ENDPOINT_URL, api_key=HF_API_KEY)
def respond(
message: str,
history: List[Tuple[str, str]],
system_message: str,
model_choice: str,
max_tokens: int,
temperature: float,
top_p: float,
):
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
response = ""
if model_choice in DEPLOY_TO_HF:
this_client = hf_client
else:
this_client = hyperb_client
for chunk in this_client.chat.completions.create(
model=AVAILABLE_MODELS[model_choice], # Use the selected model
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
):
token = chunk.choices[0].delta.content or ""
response += token
yield response
def check_password(input_password):
if input_password == PASSWORD:
return gr.update(visible=False), gr.update(visible=True)
else:
return gr.update(value="", interactive=True), gr.update(visible=False)
with gr.Blocks() as demo:
with gr.Column():
password_input = gr.Textbox(
type="password", label="Enter Password", interactive=True
)
submit_button = gr.Button("Submit")
error_message = gr.Textbox(
label="Error", visible=False, interactive=False
)
with gr.Column(visible=False) as chat_interface:
chat = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant.", label="System message"),
gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=list(AVAILABLE_MODELS.keys())[0],
label="Select Model"
),
gr.Slider(minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
submit_button.click(check_password, inputs=password_input, outputs=[password_input, chat_interface])
if __name__ == "__main__":
demo.launch(share=True) |