general-chat

Sleeping

App Files Files Community

BotifyCloudAdmin commited on Jan 31

Commit

e3b4437

verified ·

1 Parent(s): 888a6e1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -66

app.py CHANGED Viewed

@@ -5,15 +5,21 @@ from typing import List, Tuple
 # Define available models
 AVAILABLE_MODELS = {
-    "Sonar Pro": "sonar-pro",
-    "Sonar": "sonar",
 }
-PX_ENDPOINT_URL = "https://api.perplexity.ai"
-PX_API_KEY = os.getenv('PX_KEY')
 PASSWORD = os.getenv("PASSWD")  # Store the password in an environment variable
-px_client = OpenAI(base_url=PX_ENDPOINT_URL, api_key=PX_API_KEY)
 def respond(
     message: str,
@@ -24,9 +30,6 @@ def respond(
     temperature: float,
     top_p: float,
 ):
-    if model_choice not in AVAILABLE_MODELS:
-        return "Error: Invalid model selection."
     messages = [{"role": "system", "content": system_message}]
     for user_msg, assistant_msg in history:
         if user_msg:
@@ -36,36 +39,23 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    citations = []
-    try:
-        stream = px_client.chat.completions.create(
-            model=AVAILABLE_MODELS[model_choice],
-            messages=messages,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            stream=True,
-        )
-        for chunk in stream:
-            if hasattr(chunk, "choices") and chunk.choices:
-                token = chunk.choices[0].delta.content or ""
-                response += token
-                yield response  # Stream response as it arrives
-            if hasattr(chunk, "citations") and chunk.citations:
-                citations = chunk.citations
-        # Append citations as clickable links
-        if citations:
-            citation_text = "\n\nSources:\n" + "\n".join(
-                [f"[{i+1}] [{url}]({url})" for i, url in enumerate(citations)]
-            )
-            response += citation_text
-            yield response
-    except Exception as e:
-        yield f"Error: {str(e)}"
 def check_password(input_password):
     if input_password == PASSWORD:
@@ -84,37 +74,22 @@ with gr.Blocks() as demo:
         )
     with gr.Column(visible=False) as chat_interface:
-        system_prompt = gr.Textbox(
-            value="You are a helpful assistant.", label="System message"
-        )
         chat = gr.ChatInterface(
             respond,
-            chatbot=gr.Chatbot(height=400),  # Set the desired height here
-            additional_inputs=[system_prompt],  # Include system message explicitly
         )
-        with gr.Column():
-            model_choice = gr.Dropdown(
-                choices=list(AVAILABLE_MODELS.keys()),
-                value=list(AVAILABLE_MODELS.keys())[0],
-                label="Select Model"
-            )
-            max_tokens = gr.Slider(
-                minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"
-            )
-            temperature = gr.Slider(
-                minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
-            )
-            top_p = gr.Slider(
-                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
-            )
-        # Update chat interface to include additional inputs
-        chat.additional_inputs.extend([model_choice, max_tokens, temperature, top_p])
-    submit_button.click(
-        check_password, inputs=password_input, outputs=[password_input, chat_interface]
-    )
 if __name__ == "__main__":
-    demo.launch()

 # Define available models
 AVAILABLE_MODELS = {
+    "DeepSeek V3": "deepseek-ai/DeepSeek-V3",
+    "Llama3.3-70b-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
+    "Llama3.1-8b-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
 }
+HYPERB_ENDPOINT_URL = "https://api.hyperbolic.xyz/v1"
+HF_ENDPOINT_URL = "https://huggingface.co/api/inference-proxy/together"
+HYPERB_API_KEY = os.getenv('HYPERBOLIC_XYZ_KEY')
+HF_API_KEY = os.getenv('HF_KEY')
 PASSWORD = os.getenv("PASSWD")  # Store the password in an environment variable
+DEPLOY_TO_HF = ["deepseek-ai/DeepSeek-V3"]
+hyperb_client = OpenAI(base_url=HYPERB_ENDPOINT_URL, api_key=HYPERB_API_KEY)
+hf_client = OpenAI(base_url=HF_ENDPOINT_URL, api_key=HF_API_KEY)
 def respond(
     message: str,
     temperature: float,
     top_p: float,
 ):
     messages = [{"role": "system", "content": system_message}]
     for user_msg, assistant_msg in history:
         if user_msg:
     messages.append({"role": "user", "content": message})
     response = ""
+    if model_choice in DEPLOY_TO_HF:
+        this_client = hf_client
+    else:
+        this_client = hyperb_client
+    for chunk in this_client.chat.completions.create(
+        model=AVAILABLE_MODELS[model_choice],  # Use the selected model
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        stream=True,
+    ):
+        token = chunk.choices[0].delta.content or ""
+        response += token
+        yield response
 def check_password(input_password):
     if input_password == PASSWORD:
         )
     with gr.Column(visible=False) as chat_interface:
         chat = gr.ChatInterface(
             respond,
+            additional_inputs=[
+                gr.Textbox(value="You are a helpful assistant.", label="System message"),
+                gr.Dropdown(
+                    choices=list(AVAILABLE_MODELS.keys()),
+                    value=list(AVAILABLE_MODELS.keys())[0],
+                    label="Select Model"
+                ),
+                gr.Slider(minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"),
+                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+            ],
         )
+    submit_button.click(check_password, inputs=password_input, outputs=[password_input, chat_interface])
 if __name__ == "__main__":
+    demo.launch(share=True)