Spaces:

tosin2013
/

autogen-agent-gen

Runtime error

App Files Files Community

tosin2013 commited on Jan 2

Commit

edcf891

verified ·

1 Parent(s): 655d8c0

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -81

app.py CHANGED Viewed

@@ -23,7 +23,9 @@ from huggingface_hub import InferenceClient
 DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
-# Set API keys (make sure these are set in your environment)
 os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
 os.environ['OPENAI_MODEL'] = "gpt-4"
 os.environ['MODEL_PROVIDER'] = "huggingface"
@@ -46,8 +48,12 @@ else:
     )
 # Load the Hugging Face dataset
-dataset = load_dataset('tosin2013/autogen', streaming=True)
-dataset = Dataset.from_list(list(dataset['train']))
 # Initialize embeddings
 print("[EMBEDDINGS] Loading sentence-transformers model...")
@@ -63,7 +69,6 @@ texts = dataset['input']
 # Create and cache embeddings for the texts
 if not os.path.exists('embeddings.npy'):
     print("[LOG] Generating embeddings...")
-    print("[EMBEDDINGS] Generating document embeddings...")
     text_embeddings = embeddings.embed_documents(texts)
     print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
     np.save('embeddings.npy', text_embeddings)
@@ -84,6 +89,7 @@ else:
     import pickle
     with open('nn_model.pkl', 'rb') as f:
         nn = pickle.load(f)
 @spaces.GPU
 def get_relevant_documents(query, k=5):
     """
@@ -101,6 +107,7 @@ def get_relevant_documents(query, k=5):
     elapsed_time = time.time() - start_time
     print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
     return relevant_docs
 @spaces.GPU
 def generate_response(question, history):
     import time
@@ -115,6 +122,7 @@ def generate_response(question, history):
     elapsed_time = time.time() - start_time
     print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
     return response
 @spaces.GPU
 def _generate_response_gpu(question, history):
     print(f"\n[LOG] Received question: {question}")
@@ -134,49 +142,39 @@ def _generate_response_gpu(question, history):
                 "role": "system",
                 "content": '''### MEMORY ###
 Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
 ### VISIONARY GUIDANCE ###
 This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
 ### CONTEXT ###
 AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
 ### OBJECTIVE ###
 Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
 ### STYLE ###
 Professional, clear, and focused on code quality.
 ### TONE ###
 Informative, helpful, and user-centric.
 ### AUDIENCE ###
 Users seeking to implement their requests using AutoGen v0.4 agents.
 ### RESPONSE FORMAT ###
 Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
 ### TEAM PERSONAS’ CONTRIBUTIONS ###
 - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
 - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
 - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
 - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
 - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
 ### SYSTEM GUARDRAILS ###
 - If unsure about the user's request, ask clarifying questions rather than making assumptions.
 - Do not fabricate data or features not supported by AutoGen v0.4.
 - Ensure the code is scalable, modular, and adheres to best practices.
 ### START ###
 '''
-                },
-                {
-                    "role": "user",
-                    "content": prompt
-                }
-            ]
         completion = hf_client.chat.completions.create(
             model=MODEL_NAME,
             messages=messages,
@@ -201,67 +199,26 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
     # Update chat history with new message pair
     history.append((question, response))
     return history
 @spaces.GPU
 def _generate_response_cpu(question, history):
     print(f"[LOG] Running on CPU")
     try:
-        # Get relevant documents based on the query
         relevant_docs = get_relevant_documents(question, k=3)
-        print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
-        # Create the prompt for the LLM
         context = "\n".join(relevant_docs)
         prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
-        print(f"[LOG] Generated prompt: {prompt[:200]}...")  # Log first 200 chars of prompt
         if model_provider.lower() == "huggingface":
             # Use CPU version of the model
             messages = [
                 {
                     "role": "system",
-                    "content": '''### MEMORY ###
-Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
-### VISIONARY GUIDANCE ###
-This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
-### CONTEXT ###
-AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
-### OBJECTIVE ###
-Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
-### STYLE ###
-Professional, clear, and focused on code quality.
-### TONE ###
-Informative, helpful, and user-centric.
-### AUDIENCE ###
-Users seeking to implement their requests using AutoGen v0.4 agents.
-### RESPONSE FORMAT ###
-Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
-### TEAM PERSONAS’ CONTRIBUTIONS ###
-- **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
-- **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
-- **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
-- **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
-- **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
-### SYSTEM GUARDRAILS ###
-- If unsure about the user's request, ask clarifying questions rather than making assumptions.
-- Do not fabricate data or features not supported by AutoGen v0.4.
-- Ensure the code is scalable, modular, and adheres to best practices.
-### START ###
-'''
                 },
-                {
-                    "role": "user",
-                    "content": prompt
-                }
             ]
             completion = hf_client.chat.completions.create(
@@ -270,9 +227,6 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
                 max_tokens=500
             )
             response = completion.choices[0].message.content
-            print(f"[LOG] Using Hugging Face model (CPU): {MODEL_NAME}")
-            print(f"[LOG] Hugging Face response: {response[:200]}...")
         elif model_provider.lower() == "openai":
             response = client.chat.completions.create(
                 model=os.environ.get("OPENAI_MODEL"),
@@ -282,10 +236,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
                 ]
             )
             response = response.choices[0].message.content
-            print(f"[LOG] Using OpenAI model: {os.environ.get('OPENAI_MODEL')}")
-            print(f"[LOG] OpenAI response: {response[:200]}...")  # Log first 200 chars of response
-        # Update chat history with new message pair
         history.append((question, response))
         return history
     except Exception as e:
@@ -294,8 +245,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
         history.append((question, error_msg))
         return history
-# Create Gradio interface
 print("[CHAT] Initializing chat interface...")
 with gr.Blocks() as demo:
     gr.Markdown(f"""
@@ -327,23 +277,18 @@ with gr.Blocks() as demo:
         submit_btn = gr.Button("Submit")
         clear_btn = gr.Button("Clear")
-    # Event handlers
     submit_btn.click(
         fn=generate_response,
         inputs=[question, chatbot],
         outputs=[chatbot],
         queue=True
     )
-    print("[CHAT] Submit button handler configured")
     clear_btn.click(
         lambda: (None, ""),
         inputs=[],
         outputs=[chatbot, question]
     )
-    print("[CHAT] Clear button handler configured")
 if __name__ == "__main__":
     demo.launch()

 DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
+# Validate API keys
+assert os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN"), "API keys are not set in the environment variables."
 os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
 os.environ['OPENAI_MODEL'] = "gpt-4"
 os.environ['MODEL_PROVIDER'] = "huggingface"
     )
 # Load the Hugging Face dataset
+try:
+    dataset = load_dataset('tosin2013/autogen', streaming=True)
+    dataset = Dataset.from_list(list(dataset['train']))
+except Exception as e:
+    print(f"[ERROR] Failed to load dataset: {e}")
+    exit(1)
 # Initialize embeddings
 print("[EMBEDDINGS] Loading sentence-transformers model...")
 # Create and cache embeddings for the texts
 if not os.path.exists('embeddings.npy'):
     print("[LOG] Generating embeddings...")
     text_embeddings = embeddings.embed_documents(texts)
     print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
     np.save('embeddings.npy', text_embeddings)
     import pickle
     with open('nn_model.pkl', 'rb') as f:
         nn = pickle.load(f)
 @spaces.GPU
 def get_relevant_documents(query, k=5):
     """
     elapsed_time = time.time() - start_time
     print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
     return relevant_docs
 @spaces.GPU
 def generate_response(question, history):
     import time
     elapsed_time = time.time() - start_time
     print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
     return response
 @spaces.GPU
 def _generate_response_gpu(question, history):
     print(f"\n[LOG] Received question: {question}")
                 "role": "system",
                 "content": '''### MEMORY ###
 Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
 ### VISIONARY GUIDANCE ###
 This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
 ### CONTEXT ###
 AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
 ### OBJECTIVE ###
 Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
 ### STYLE ###
 Professional, clear, and focused on code quality.
 ### TONE ###
 Informative, helpful, and user-centric.
 ### AUDIENCE ###
 Users seeking to implement their requests using AutoGen v0.4 agents.
 ### RESPONSE FORMAT ###
 Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
 ### TEAM PERSONAS’ CONTRIBUTIONS ###
 - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
 - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
 - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
 - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
 - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
 ### SYSTEM GUARDRAILS ###
 - If unsure about the user's request, ask clarifying questions rather than making assumptions.
 - Do not fabricate data or features not supported by AutoGen v0.4.
 - Ensure the code is scalable, modular, and adheres to best practices.
 ### START ###
 '''
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
         completion = hf_client.chat.completions.create(
             model=MODEL_NAME,
             messages=messages,
     # Update chat history with new message pair
     history.append((question, response))
     return history
+# Simplified CPU fallback
 @spaces.GPU
 def _generate_response_cpu(question, history):
     print(f"[LOG] Running on CPU")
     try:
         relevant_docs = get_relevant_documents(question, k=3)
         context = "\n".join(relevant_docs)
         prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
+        print(f"[LOG] Generated prompt: {prompt[:200]}...")
         if model_provider.lower() == "huggingface":
             # Use CPU version of the model
             messages = [
                 {
                     "role": "system",
+                    "content": '''### MEMORY ###\nRecall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
+### SYSTEM GUARDRAILS ###'''
                 },
+                {"role": "user", "content": prompt}
             ]
             completion = hf_client.chat.completions.create(
                 max_tokens=500
             )
             response = completion.choices[0].message.content
         elif model_provider.lower() == "openai":
             response = client.chat.completions.create(
                 model=os.environ.get("OPENAI_MODEL"),
                 ]
             )
             response = response.choices[0].message.content
         history.append((question, response))
         return history
     except Exception as e:
         history.append((question, error_msg))
         return history
+# Gradio interface
 print("[CHAT] Initializing chat interface...")
 with gr.Blocks() as demo:
     gr.Markdown(f"""
         submit_btn = gr.Button("Submit")
         clear_btn = gr.Button("Clear")
     submit_btn.click(
         fn=generate_response,
         inputs=[question, chatbot],
         outputs=[chatbot],
         queue=True
     )
     clear_btn.click(
         lambda: (None, ""),
         inputs=[],
         outputs=[chatbot, question]
     )
 if __name__ == "__main__":
     demo.launch()