PythonAIPairProgrammer

Runtime error

App Files Files Community

awacke1 commited on Nov 16, 2023

Commit

c4f41db

1 Parent(s): b13cc5a

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -9

app.py CHANGED Viewed

@@ -150,18 +150,77 @@ def readitaloud(result):
     components.html(documentHTML5, width=800, height=300)
     #return result
-# Example usage:
-#prompt = "This is a sample prompt with emojis! 😊"
-#response = "This is a sample response with emojis! 🚀"
-#filename = generate_filename(prompt, "html")
-#create_file(filename, prompt, response)
-# Example usage for reading aloud:
-#response_for_reading_aloud = "This is a response for reading aloud."
-#readitaloud(response_for_reading_aloud)
 # Chat and Chat with files
-def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
     model = model_choice
     conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
     conversation.append({'role': 'user', 'content': prompt})

     components.html(documentHTML5, width=800, height=300)
     #return result
+# Llama 7b
+def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):):
+    # API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud'  # Dr Llama
+    API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"  # HF default model for l7b
+    API_KEY = os.getenv('API_KEY')
+    MODEL1="meta-llama/Llama-2-7b-chat-hf"
+    MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
+    HF_KEY = os.getenv('HF_KEY')
+    headers = {
+        "Authorization": f"Bearer {HF_KEY}",
+        "Content-Type": "application/json"
+    }
+    model = model_choice
+    conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
+    conversation.append({'role': 'user', 'content': prompt})
+    if len(document_section)>0:
+        conversation.append({'role': 'assistant', 'content': document_section})
+    start_time = time.time()
+    report = []
+    res_box = st.empty()
+    collected_chunks = []
+    collected_messages = []
+    try:
+        endpoint_url = API_URL
+        hf_token = API_KEY
+        client = InferenceClient(endpoint_url, token=hf_token)
+        gen_kwargs = dict(
+            max_new_tokens=512,
+            top_k=30,
+            top_p=0.9,
+            temperature=0.2,
+            repetition_penalty=1.02,
+            stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
+        )
+        stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
+        report=[]
+        res_box = st.empty()
+        collected_chunks=[]
+        collected_messages=[]
+        allresults=''
+        for r in stream:
+            if r.token.special:
+                continue
+            if r.token.text in gen_kwargs["stop_sequences"]:
+                break
+            collected_chunks.append(r.token.text)
+            chunk_message = r.token.text
+            collected_messages.append(chunk_message)
+            try:
+                report.append(r.token.text)
+                if len(r.token.text) > 0:
+                    result="".join(report).strip()
+                    res_box.markdown(f'*{result}*')
+            except:
+                st.write('Stream llm issue')
+        st.write("Elapsed time:")
+        st.write(time.time() - start_time)
+        filename = generate_filename(full_reply_content, choice)
+        create_file(filename, prompt, full_reply_content, should_save)
+        readitaloud(full_reply_content)
+        return result
+    except:
+        st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).')
 # Chat and Chat with files
+def chat_with_model_gpt(prompt, document_section, model_choice='gpt-3.5-turbo'):
     model = model_choice
     conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
     conversation.append({'role': 'user', 'content': prompt})