PythonAIPairProgrammer

Runtime error

App Files Files Community

awacke1 commited on Nov 16, 2023

Commit

fd96369

1 Parent(s): b74139a

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -174,50 +174,46 @@ def chat_with_model(prompt, document_section, model_choice='Llama-2-7b-chat-hf')
     collected_chunks = []
     collected_messages = []
-    try:
-        endpoint_url = API_URL
-        hf_token = API_KEY
-        client = InferenceClient(endpoint_url, token=hf_token)
-        gen_kwargs = dict(
-            max_new_tokens=512,
-            top_k=30,
-            top_p=0.9,
-            temperature=0.2,
-            repetition_penalty=1.02,
-            stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
-        )
-        stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
-        report=[]
-        res_box = st.empty()
-        collected_chunks=[]
-        collected_messages=[]
-        allresults=''
-        for r in stream:
-            if r.token.special:
-                continue
-            if r.token.text in gen_kwargs["stop_sequences"]:
-                break
-            collected_chunks.append(r.token.text)
-            chunk_message = r.token.text
-            collected_messages.append(chunk_message)
-            try:
-                report.append(r.token.text)
-                if len(r.token.text) > 0:
-                    result="".join(report).strip()
-                    res_box.markdown(f'*{result}*')
-            except:
-                st.write('Stream llm issue')
-        st.write("Elapsed time:")
-        st.write(time.time() - start_time)
-        filename = generate_filename(full_reply_content, choice)
-        create_file(filename, prompt, full_reply_content, should_save)
-        readitaloud(full_reply_content)
-        return result
-    except:
-        st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).')
 # Chat and Chat with files
 def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):

     collected_chunks = []
     collected_messages = []
+    endpoint_url = API_URL
+    hf_token = API_KEY
+    client = InferenceClient(endpoint_url, token=hf_token)
+    gen_kwargs = dict(
+        max_new_tokens=512,
+        top_k=30,
+        top_p=0.9,
+        temperature=0.2,
+        repetition_penalty=1.02,
+        stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
+    )
+    stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
+    report=[]
+    res_box = st.empty()
+    collected_chunks=[]
+    collected_messages=[]
+    allresults=''
+    for r in stream:
+        if r.token.special:
+            continue
+        if r.token.text in gen_kwargs["stop_sequences"]:
+            break
+        collected_chunks.append(r.token.text)
+        chunk_message = r.token.text
+        collected_messages.append(chunk_message)
+        try:
+            report.append(r.token.text)
+            if len(r.token.text) > 0:
+                result="".join(report).strip()
+                res_box.markdown(f'*{result}*')
+        except:
+            st.write('Stream llm issue')
+    st.write("Elapsed time:")
+    st.write(time.time() - start_time)
+    filename = generate_filename(full_reply_content, choice)
+    create_file(filename, prompt, full_reply_content, should_save)
+    readitaloud(full_reply_content)
+    return result
 # Chat and Chat with files
 def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):