ngebodh commited on
Commit
807a9b1
Ā·
verified Ā·
1 Parent(s): a1ab263

Updated the inference API

Browse files

Updated the app to provide some limited API calls.

Files changed (1) hide show
  1. app.py +132 -39
app.py CHANGED
@@ -1,7 +1,6 @@
1
  """ Simple Chatbot
2
  @author: Nigel Gebodh
3
  @email: [email protected]
4
-
5
  """
6
  import numpy as np
7
  import streamlit as st
@@ -11,14 +10,36 @@ import sys
11
  from dotenv import load_dotenv, dotenv_values
12
  load_dotenv()
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- #Comment_test_11_09_2024
16
 
17
 
18
 
 
19
 
 
 
 
20
 
21
- model_links ={
 
 
22
  "Gemma-3-27B-it":{
23
  "inf_point":"https://router.huggingface.co/nebius/v1",
24
  "link":"google/gemma-3-27b-it-fast",
@@ -45,6 +66,18 @@ model_links ={
45
  },
46
  }
47
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  #Pull info about the model to display
49
  model_info ={
50
  "Mistral-7B":
@@ -63,6 +96,10 @@ model_info ={
63
  {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
64
  \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **2 billion parameters.** \n""",
65
  'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
 
 
 
 
66
  "Zephyr-7B":
67
  {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
68
  \nFrom Huggingface: \n\
@@ -118,6 +155,44 @@ def reset_conversation():
118
 
119
 
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  # Define the available models
123
  models =[key for key in model_links.keys()]
@@ -129,11 +204,14 @@ selected_model = st.sidebar.selectbox("Select Model", models)
129
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
130
 
131
 
 
132
  #Add reset button to clear conversation
133
- st.sidebar.button('Reset Chat', on_click=reset_conversation) #Reset button
134
 
 
135
 
136
  # Create model description
 
137
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
138
  st.sidebar.markdown(model_info[selected_model]['description'])
139
  st.sidebar.image(model_info[selected_model]['logo'])
@@ -149,7 +227,6 @@ if "prev_option" not in st.session_state:
149
 
150
  if st.session_state.prev_option != selected_model:
151
  st.session_state.messages = []
152
- # st.write(f"Changed to {selected_model}")
153
  st.session_state.prev_option = selected_model
154
  reset_conversation()
155
 
@@ -161,12 +238,11 @@ repo_id = model_links[selected_model]
161
  # initialize the client
162
  client = OpenAI(
163
  base_url=model_links[selected_model]["inf_point"],#"https://api-inference.huggingface.co/v1",
164
- api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')#"hf_xxx" # Replace with your token
165
  )
166
 
167
 
168
  st.subheader(f'AI - {selected_model}')
169
- # st.title(f'ChatBot Using {selected_model}')
170
 
171
  # Set a default model
172
  if selected_model not in st.session_state:
@@ -184,8 +260,9 @@ for message in st.session_state.messages:
184
 
185
 
186
 
187
- # Accept user input
188
- if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question"):
 
189
 
190
  # Display user message in chat message container
191
  with st.chat_message("user"):
@@ -194,38 +271,54 @@ if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question"):
194
  st.session_state.messages.append({"role": "user", "content": prompt})
195
 
196
 
197
- # Display assistant response in chat message container
198
- with st.chat_message("assistant"):
199
-
200
- try:
201
- stream = client.chat.completions.create(
202
- model=model_links[selected_model]["link"],
203
- messages=[
204
- {"role": m["role"], "content": m["content"]}
205
- for m in st.session_state.messages
206
- ],
207
- temperature=temp_values,#0.5,
208
- stream=True,
209
- max_tokens=3000,
210
- )
211
-
212
- response = st.write_stream(stream)
213
 
214
- except Exception as e:
215
- # st.empty()
216
- response = "šŸ˜µā€šŸ’« Looks like someone unplugged something!\
217
- \n Either the model space is being updated or something is down.\
218
- \n\
219
- \n Try again later. \
220
- \n\
221
- \n Here's a random pic of a 🐶:"
222
- st.write(response)
223
- random_dog_pick = 'https://random.dog/'+ random_dog[np.random.randint(len(random_dog))]
224
- st.image(random_dog_pick)
225
- st.write("This was the error message:")
226
- st.write(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
 
229
 
230
 
231
- st.session_state.messages.append({"role": "assistant", "content": response})
 
1
  """ Simple Chatbot
2
  @author: Nigel Gebodh
3
  @email: [email protected]
 
4
  """
5
  import numpy as np
6
  import streamlit as st
 
10
  from dotenv import load_dotenv, dotenv_values
11
  load_dotenv()
12
 
13
+ #===========================================
14
+ updates = '''
15
+ Updates
16
+ + 04/20/2025
17
+ - Changed the inference from HF b/c
18
+ API calls are not very limted.
19
+ - Added API call limiting to allow for demoing
20
+ - Added support for adding your own API token.
21
+
22
+ + 04/16/2025
23
+ - Changed the inference points on HF b/c
24
+ older points no longer supported.
25
+ '''
26
+ #-------------------------------------------
27
+
28
+
29
+
30
 
 
31
 
32
 
33
 
34
+ API_CALL_LIMIT = 5 # Define the limit
35
 
36
+ if 'api_call_count' not in st.session_state:
37
+ st.session_state.api_call_count = 0
38
+ st.session_state.remaining_calls = API_CALL_LIMIT
39
 
40
+
41
+
42
+ model_links_hf ={
43
  "Gemma-3-27B-it":{
44
  "inf_point":"https://router.huggingface.co/nebius/v1",
45
  "link":"google/gemma-3-27b-it-fast",
 
66
  },
67
  }
68
 
69
+
70
+ model_links_groq ={
71
+ "Gemma-2-9B-it":{
72
+ "inf_point":"https://api.groq.com/openai/v1",
73
+ "link":"gemma2-9b-it",
74
+ },
75
+ "Meta-Llama-3.1-8B":{
76
+ "inf_point":"https://api.groq.com/openai/v1",
77
+ "link":"llama-3.1-8b-instant",
78
+ },
79
+ }
80
+
81
  #Pull info about the model to display
82
  model_info ={
83
  "Mistral-7B":
 
96
  {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
97
  \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **2 billion parameters.** \n""",
98
  'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
99
+ "Gemma-2-9B-it":
100
+ {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
101
+ \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **9 billion parameters.** \n""",
102
+ 'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
103
  "Zephyr-7B":
104
  {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
105
  \nFrom Huggingface: \n\
 
155
 
156
 
157
 
158
+ # --- Sidebar Setup ---
159
+ st.sidebar.title("Chatbot Settings")
160
+
161
+ #Define model clients
162
+ client_names = ["Provided API Call", "HF-Token"]
163
+ client_select = st.sidebar.selectbox("Select Model Client", client_names)
164
+
165
+
166
+
167
+
168
+
169
+
170
+ if "HF-Token" in client_select:
171
+ try:
172
+ if "API_token" not in st.session_state:
173
+ st.session_state.API_token = None
174
+
175
+ st.session_state.API_token = st.sidebar.text_input("Enter you Hugging Face Access Token", type="password")
176
+ model_links = model_links_hf
177
+
178
+ except Exception as e:
179
+ st.sidebar.error(f"Credentials Error:\n\n {e}")
180
+
181
+ elif "Provided API Call" in client_select:
182
+ try:
183
+ if "API_token" not in st.session_state:
184
+ st.session_state.API_token = None
185
+
186
+ st.session_state.API_token = os.environ.get('GROQ_API_TOKEN')#Should be like os.environ.get('HUGGINGFACE_API_TOKEN')
187
+
188
+ model_links = model_links_groq
189
+
190
+ except Exception as e:
191
+ st.sidebar.error(f"Credentials Error:\n\n {e}")
192
+
193
+
194
+
195
+
196
 
197
  # Define the available models
198
  models =[key for key in model_links.keys()]
 
204
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
205
 
206
 
207
+
208
  #Add reset button to clear conversation
209
+ st.sidebar.button('Reset Chat', on_click=reset_conversation, type="primary") #Reset button
210
 
211
+ st.sidebar.divider() # Add a visual separator
212
 
213
  # Create model description
214
+ st.sidebar.subheader(f"About {selected_model}")
215
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
216
  st.sidebar.markdown(model_info[selected_model]['description'])
217
  st.sidebar.image(model_info[selected_model]['logo'])
 
227
 
228
  if st.session_state.prev_option != selected_model:
229
  st.session_state.messages = []
 
230
  st.session_state.prev_option = selected_model
231
  reset_conversation()
232
 
 
238
  # initialize the client
239
  client = OpenAI(
240
  base_url=model_links[selected_model]["inf_point"],#"https://api-inference.huggingface.co/v1",
241
+ api_key=st.session_state.API_token#os.environ.get('HUGGINGFACE_API_TOKEN')#"hf_xxx" # Replace with your token
242
  )
243
 
244
 
245
  st.subheader(f'AI - {selected_model}')
 
246
 
247
  # Set a default model
248
  if selected_model not in st.session_state:
 
260
 
261
 
262
 
263
+
264
+
265
+ if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question "):
266
 
267
  # Display user message in chat message container
268
  with st.chat_message("user"):
 
271
  st.session_state.messages.append({"role": "user", "content": prompt})
272
 
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ if st.session_state.api_call_count >= API_CALL_LIMIT:
276
+
277
+ # Add the warning to the displayed messages, but not to the history sent to the model
278
+ response = f"LIMIT REACHED: Sorry, you have reached the API call limit for this session."
279
+ # st.write(response)
280
+ st.warning(f"Sorry, you have reached the API call limit for this session.")
281
+ st.session_state.messages.append({"role": "assistant", "content": response })
282
+
283
+
284
+ else:
285
+ # Display assistant response in chat message container
286
+ with st.chat_message("assistant"):
287
+ try:
288
+ st.session_state.api_call_count += 1
289
+ # Add a spinner for better UX while waiting
290
+ with st.spinner(f"Asking {selected_model}..."):
291
+
292
+ stream = client.chat.completions.create(
293
+ model=model_links[selected_model]["link"],
294
+ messages=[
295
+ {"role": m["role"], "content": m["content"]}
296
+ for m in st.session_state.messages
297
+ ],
298
+ temperature=temp_values,#0.5,
299
+ stream=True,
300
+ max_tokens=3000,
301
+ )
302
+
303
+ response = st.write_stream(stream)
304
+
305
+ remaining_calls = (API_CALL_LIMIT) - st.session_state.api_call_count
306
+ st.markdown(f"\n\n <span style='float: right; font-size: 0.8em; color: gray;'>API calls:({remaining_calls}/{API_CALL_LIMIT})</span>", unsafe_allow_html=True)
307
+
308
+ except Exception as e:
309
+ response = "šŸ˜µā€šŸ’« Looks like someone unplugged something!\
310
+ \n Either the model space is being updated or something is down.\
311
+ \n\
312
+ \n Try again later. \
313
+ \n\
314
+ \n Here's a random pic of a 🐶:"
315
+ st.write(response)
316
+ random_dog_pick = 'https://random.dog/'+ random_dog[np.random.randint(len(random_dog))]
317
+ st.image(random_dog_pick)
318
+ st.write("This was the error message:")
319
+ st.write(e)
320
 
321
 
322
 
323
 
324
+ st.session_state.messages.append({"role": "assistant", "content": response})