awacke1 commited on
Commit
c4f41db
·
1 Parent(s): b13cc5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -9
app.py CHANGED
@@ -150,18 +150,77 @@ def readitaloud(result):
150
  components.html(documentHTML5, width=800, height=300)
151
  #return result
152
 
153
- # Example usage:
154
- #prompt = "This is a sample prompt with emojis! 😊"
155
- #response = "This is a sample response with emojis! 🚀"
156
- #filename = generate_filename(prompt, "html")
157
- #create_file(filename, prompt, response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- # Example usage for reading aloud:
160
- #response_for_reading_aloud = "This is a response for reading aloud."
161
- #readitaloud(response_for_reading_aloud)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
 
 
163
  # Chat and Chat with files
164
- def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
165
  model = model_choice
166
  conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
167
  conversation.append({'role': 'user', 'content': prompt})
 
150
  components.html(documentHTML5, width=800, height=300)
151
  #return result
152
 
153
+ # Llama 7b
154
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):):
155
+ # API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
156
+ API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf" # HF default model for l7b
157
+ API_KEY = os.getenv('API_KEY')
158
+ MODEL1="meta-llama/Llama-2-7b-chat-hf"
159
+ MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
160
+ HF_KEY = os.getenv('HF_KEY')
161
+ headers = {
162
+ "Authorization": f"Bearer {HF_KEY}",
163
+ "Content-Type": "application/json"
164
+ }
165
+
166
+ model = model_choice
167
+ conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
168
+ conversation.append({'role': 'user', 'content': prompt})
169
+ if len(document_section)>0:
170
+ conversation.append({'role': 'assistant', 'content': document_section})
171
+ start_time = time.time()
172
+ report = []
173
+ res_box = st.empty()
174
+ collected_chunks = []
175
+ collected_messages = []
176
 
177
+ try:
178
+ endpoint_url = API_URL
179
+ hf_token = API_KEY
180
+ client = InferenceClient(endpoint_url, token=hf_token)
181
+ gen_kwargs = dict(
182
+ max_new_tokens=512,
183
+ top_k=30,
184
+ top_p=0.9,
185
+ temperature=0.2,
186
+ repetition_penalty=1.02,
187
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
188
+ )
189
+ stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
190
+ report=[]
191
+ res_box = st.empty()
192
+ collected_chunks=[]
193
+ collected_messages=[]
194
+ allresults=''
195
+ for r in stream:
196
+ if r.token.special:
197
+ continue
198
+ if r.token.text in gen_kwargs["stop_sequences"]:
199
+ break
200
+ collected_chunks.append(r.token.text)
201
+ chunk_message = r.token.text
202
+ collected_messages.append(chunk_message)
203
+ try:
204
+ report.append(r.token.text)
205
+ if len(r.token.text) > 0:
206
+ result="".join(report).strip()
207
+ res_box.markdown(f'*{result}*')
208
+
209
+ except:
210
+ st.write('Stream llm issue')
211
+ st.write("Elapsed time:")
212
+ st.write(time.time() - start_time)
213
+ filename = generate_filename(full_reply_content, choice)
214
+ create_file(filename, prompt, full_reply_content, should_save)
215
+ readitaloud(full_reply_content)
216
+ return result
217
+ except:
218
+ st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).')
219
 
220
+
221
+
222
  # Chat and Chat with files
223
+ def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):
224
  model = model_choice
225
  conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
226
  conversation.append({'role': 'user', 'content': prompt})