awacke1 commited on
Commit
fd96369
·
1 Parent(s): b74139a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -44
app.py CHANGED
@@ -174,50 +174,46 @@ def chat_with_model(prompt, document_section, model_choice='Llama-2-7b-chat-hf')
174
  collected_chunks = []
175
  collected_messages = []
176
 
177
- try:
178
- endpoint_url = API_URL
179
- hf_token = API_KEY
180
- client = InferenceClient(endpoint_url, token=hf_token)
181
- gen_kwargs = dict(
182
- max_new_tokens=512,
183
- top_k=30,
184
- top_p=0.9,
185
- temperature=0.2,
186
- repetition_penalty=1.02,
187
- stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
188
- )
189
- stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
190
- report=[]
191
- res_box = st.empty()
192
- collected_chunks=[]
193
- collected_messages=[]
194
- allresults=''
195
- for r in stream:
196
- if r.token.special:
197
- continue
198
- if r.token.text in gen_kwargs["stop_sequences"]:
199
- break
200
- collected_chunks.append(r.token.text)
201
- chunk_message = r.token.text
202
- collected_messages.append(chunk_message)
203
- try:
204
- report.append(r.token.text)
205
- if len(r.token.text) > 0:
206
- result="".join(report).strip()
207
- res_box.markdown(f'*{result}*')
208
-
209
- except:
210
- st.write('Stream llm issue')
211
- st.write("Elapsed time:")
212
- st.write(time.time() - start_time)
213
- filename = generate_filename(full_reply_content, choice)
214
- create_file(filename, prompt, full_reply_content, should_save)
215
- readitaloud(full_reply_content)
216
- return result
217
- except:
218
- st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).')
219
-
220
-
221
 
222
  # Chat and Chat with files
223
  def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):
 
174
  collected_chunks = []
175
  collected_messages = []
176
 
177
+ endpoint_url = API_URL
178
+ hf_token = API_KEY
179
+ client = InferenceClient(endpoint_url, token=hf_token)
180
+ gen_kwargs = dict(
181
+ max_new_tokens=512,
182
+ top_k=30,
183
+ top_p=0.9,
184
+ temperature=0.2,
185
+ repetition_penalty=1.02,
186
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
187
+ )
188
+ stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
189
+ report=[]
190
+ res_box = st.empty()
191
+ collected_chunks=[]
192
+ collected_messages=[]
193
+ allresults=''
194
+ for r in stream:
195
+ if r.token.special:
196
+ continue
197
+ if r.token.text in gen_kwargs["stop_sequences"]:
198
+ break
199
+ collected_chunks.append(r.token.text)
200
+ chunk_message = r.token.text
201
+ collected_messages.append(chunk_message)
202
+ try:
203
+ report.append(r.token.text)
204
+ if len(r.token.text) > 0:
205
+ result="".join(report).strip()
206
+ res_box.markdown(f'*{result}*')
207
+
208
+ except:
209
+ st.write('Stream llm issue')
210
+ st.write("Elapsed time:")
211
+ st.write(time.time() - start_time)
212
+ filename = generate_filename(full_reply_content, choice)
213
+ create_file(filename, prompt, full_reply_content, should_save)
214
+ readitaloud(full_reply_content)
215
+ return result
216
+
 
 
 
 
217
 
218
  # Chat and Chat with files
219
  def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):