BotifyCloudAdmin commited on
Commit
e3b4437
·
verified ·
1 Parent(s): 888a6e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -66
app.py CHANGED
@@ -5,15 +5,21 @@ from typing import List, Tuple
5
 
6
  # Define available models
7
  AVAILABLE_MODELS = {
8
- "Sonar Pro": "sonar-pro",
9
- "Sonar": "sonar",
 
10
  }
11
 
12
- PX_ENDPOINT_URL = "https://api.perplexity.ai"
13
- PX_API_KEY = os.getenv('PX_KEY')
 
 
14
  PASSWORD = os.getenv("PASSWD") # Store the password in an environment variable
15
 
16
- px_client = OpenAI(base_url=PX_ENDPOINT_URL, api_key=PX_API_KEY)
 
 
 
17
 
18
  def respond(
19
  message: str,
@@ -24,9 +30,6 @@ def respond(
24
  temperature: float,
25
  top_p: float,
26
  ):
27
- if model_choice not in AVAILABLE_MODELS:
28
- return "Error: Invalid model selection."
29
-
30
  messages = [{"role": "system", "content": system_message}]
31
  for user_msg, assistant_msg in history:
32
  if user_msg:
@@ -36,36 +39,23 @@ def respond(
36
  messages.append({"role": "user", "content": message})
37
 
38
  response = ""
39
- citations = []
40
-
41
- try:
42
- stream = px_client.chat.completions.create(
43
- model=AVAILABLE_MODELS[model_choice],
44
- messages=messages,
45
- max_tokens=max_tokens,
46
- temperature=temperature,
47
- top_p=top_p,
48
- stream=True,
49
- )
50
-
51
- for chunk in stream:
52
- if hasattr(chunk, "choices") and chunk.choices:
53
- token = chunk.choices[0].delta.content or ""
54
- response += token
55
- yield response # Stream response as it arrives
56
- if hasattr(chunk, "citations") and chunk.citations:
57
- citations = chunk.citations
58
-
59
- # Append citations as clickable links
60
- if citations:
61
- citation_text = "\n\nSources:\n" + "\n".join(
62
- [f"[{i+1}] [{url}]({url})" for i, url in enumerate(citations)]
63
- )
64
- response += citation_text
65
- yield response
66
-
67
- except Exception as e:
68
- yield f"Error: {str(e)}"
69
 
70
  def check_password(input_password):
71
  if input_password == PASSWORD:
@@ -84,37 +74,22 @@ with gr.Blocks() as demo:
84
  )
85
 
86
  with gr.Column(visible=False) as chat_interface:
87
- system_prompt = gr.Textbox(
88
- value="You are a helpful assistant.", label="System message"
89
- )
90
  chat = gr.ChatInterface(
91
  respond,
92
- chatbot=gr.Chatbot(height=400), # Set the desired height here
93
- additional_inputs=[system_prompt], # Include system message explicitly
 
 
 
 
 
 
 
 
 
94
  )
95
 
96
- with gr.Column():
97
- model_choice = gr.Dropdown(
98
- choices=list(AVAILABLE_MODELS.keys()),
99
- value=list(AVAILABLE_MODELS.keys())[0],
100
- label="Select Model"
101
- )
102
- max_tokens = gr.Slider(
103
- minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"
104
- )
105
- temperature = gr.Slider(
106
- minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
107
- )
108
- top_p = gr.Slider(
109
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
110
- )
111
-
112
- # Update chat interface to include additional inputs
113
- chat.additional_inputs.extend([model_choice, max_tokens, temperature, top_p])
114
-
115
- submit_button.click(
116
- check_password, inputs=password_input, outputs=[password_input, chat_interface]
117
- )
118
 
119
  if __name__ == "__main__":
120
- demo.launch()
 
5
 
6
  # Define available models
7
  AVAILABLE_MODELS = {
8
+ "DeepSeek V3": "deepseek-ai/DeepSeek-V3",
9
+ "Llama3.3-70b-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
10
+ "Llama3.1-8b-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
11
  }
12
 
13
+ HYPERB_ENDPOINT_URL = "https://api.hyperbolic.xyz/v1"
14
+ HF_ENDPOINT_URL = "https://huggingface.co/api/inference-proxy/together"
15
+ HYPERB_API_KEY = os.getenv('HYPERBOLIC_XYZ_KEY')
16
+ HF_API_KEY = os.getenv('HF_KEY')
17
  PASSWORD = os.getenv("PASSWD") # Store the password in an environment variable
18
 
19
+ DEPLOY_TO_HF = ["deepseek-ai/DeepSeek-V3"]
20
+
21
+ hyperb_client = OpenAI(base_url=HYPERB_ENDPOINT_URL, api_key=HYPERB_API_KEY)
22
+ hf_client = OpenAI(base_url=HF_ENDPOINT_URL, api_key=HF_API_KEY)
23
 
24
  def respond(
25
  message: str,
 
30
  temperature: float,
31
  top_p: float,
32
  ):
 
 
 
33
  messages = [{"role": "system", "content": system_message}]
34
  for user_msg, assistant_msg in history:
35
  if user_msg:
 
39
  messages.append({"role": "user", "content": message})
40
 
41
  response = ""
42
+
43
+ if model_choice in DEPLOY_TO_HF:
44
+ this_client = hf_client
45
+ else:
46
+ this_client = hyperb_client
47
+
48
+ for chunk in this_client.chat.completions.create(
49
+ model=AVAILABLE_MODELS[model_choice], # Use the selected model
50
+ messages=messages,
51
+ max_tokens=max_tokens,
52
+ temperature=temperature,
53
+ top_p=top_p,
54
+ stream=True,
55
+ ):
56
+ token = chunk.choices[0].delta.content or ""
57
+ response += token
58
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def check_password(input_password):
61
  if input_password == PASSWORD:
 
74
  )
75
 
76
  with gr.Column(visible=False) as chat_interface:
 
 
 
77
  chat = gr.ChatInterface(
78
  respond,
79
+ additional_inputs=[
80
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
81
+ gr.Dropdown(
82
+ choices=list(AVAILABLE_MODELS.keys()),
83
+ value=list(AVAILABLE_MODELS.keys())[0],
84
+ label="Select Model"
85
+ ),
86
+ gr.Slider(minimum=1, maximum=30000, value=2048, step=100, label="Max new tokens"),
87
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
88
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
89
+ ],
90
  )
91
 
92
+ submit_button.click(check_password, inputs=password_input, outputs=[password_input, chat_interface])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  if __name__ == "__main__":
95
+ demo.launch(share=True)