Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,9 @@ from huggingface_hub import InferenceClient
|
|
23 |
|
24 |
DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
|
25 |
|
26 |
-
#
|
|
|
|
|
27 |
os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
|
28 |
os.environ['OPENAI_MODEL'] = "gpt-4"
|
29 |
os.environ['MODEL_PROVIDER'] = "huggingface"
|
@@ -46,8 +48,12 @@ else:
|
|
46 |
)
|
47 |
|
48 |
# Load the Hugging Face dataset
|
49 |
-
|
50 |
-
dataset =
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Initialize embeddings
|
53 |
print("[EMBEDDINGS] Loading sentence-transformers model...")
|
@@ -63,7 +69,6 @@ texts = dataset['input']
|
|
63 |
# Create and cache embeddings for the texts
|
64 |
if not os.path.exists('embeddings.npy'):
|
65 |
print("[LOG] Generating embeddings...")
|
66 |
-
print("[EMBEDDINGS] Generating document embeddings...")
|
67 |
text_embeddings = embeddings.embed_documents(texts)
|
68 |
print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
|
69 |
np.save('embeddings.npy', text_embeddings)
|
@@ -84,6 +89,7 @@ else:
|
|
84 |
import pickle
|
85 |
with open('nn_model.pkl', 'rb') as f:
|
86 |
nn = pickle.load(f)
|
|
|
87 |
@spaces.GPU
|
88 |
def get_relevant_documents(query, k=5):
|
89 |
"""
|
@@ -101,6 +107,7 @@ def get_relevant_documents(query, k=5):
|
|
101 |
elapsed_time = time.time() - start_time
|
102 |
print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
|
103 |
return relevant_docs
|
|
|
104 |
@spaces.GPU
|
105 |
def generate_response(question, history):
|
106 |
import time
|
@@ -115,6 +122,7 @@ def generate_response(question, history):
|
|
115 |
elapsed_time = time.time() - start_time
|
116 |
print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
|
117 |
return response
|
|
|
118 |
@spaces.GPU
|
119 |
def _generate_response_gpu(question, history):
|
120 |
print(f"\n[LOG] Received question: {question}")
|
@@ -134,49 +142,39 @@ def _generate_response_gpu(question, history):
|
|
134 |
"role": "system",
|
135 |
"content": '''### MEMORY ###
|
136 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
137 |
-
|
138 |
### VISIONARY GUIDANCE ###
|
139 |
This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
|
140 |
-
|
141 |
### CONTEXT ###
|
142 |
AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
|
143 |
-
|
144 |
### OBJECTIVE ###
|
145 |
Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
|
146 |
-
|
147 |
### STYLE ###
|
148 |
Professional, clear, and focused on code quality.
|
149 |
-
|
150 |
### TONE ###
|
151 |
Informative, helpful, and user-centric.
|
152 |
-
|
153 |
### AUDIENCE ###
|
154 |
Users seeking to implement their requests using AutoGen v0.4 agents.
|
155 |
-
|
156 |
### RESPONSE FORMAT ###
|
157 |
Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
|
158 |
-
|
159 |
### TEAM PERSONAS’ CONTRIBUTIONS ###
|
160 |
- **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
|
161 |
- **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
|
162 |
- **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
|
163 |
- **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
|
164 |
- **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
|
165 |
-
|
166 |
### SYSTEM GUARDRAILS ###
|
167 |
- If unsure about the user's request, ask clarifying questions rather than making assumptions.
|
168 |
- Do not fabricate data or features not supported by AutoGen v0.4.
|
169 |
- Ensure the code is scalable, modular, and adheres to best practices.
|
170 |
-
|
171 |
### START ###
|
172 |
'''
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
completion = hf_client.chat.completions.create(
|
181 |
model=MODEL_NAME,
|
182 |
messages=messages,
|
@@ -201,67 +199,26 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
|
|
201 |
# Update chat history with new message pair
|
202 |
history.append((question, response))
|
203 |
return history
|
|
|
|
|
204 |
@spaces.GPU
|
205 |
def _generate_response_cpu(question, history):
|
206 |
print(f"[LOG] Running on CPU")
|
207 |
try:
|
208 |
-
# Get relevant documents based on the query
|
209 |
relevant_docs = get_relevant_documents(question, k=3)
|
210 |
-
print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
|
211 |
-
|
212 |
-
# Create the prompt for the LLM
|
213 |
context = "\n".join(relevant_docs)
|
214 |
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
|
215 |
-
print(f"[LOG] Generated prompt: {prompt[:200]}...")
|
216 |
|
217 |
if model_provider.lower() == "huggingface":
|
218 |
# Use CPU version of the model
|
219 |
messages = [
|
220 |
{
|
221 |
"role": "system",
|
222 |
-
"content": '''### MEMORY
|
223 |
-
|
224 |
-
|
225 |
-
### VISIONARY GUIDANCE ###
|
226 |
-
This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
|
227 |
-
|
228 |
-
### CONTEXT ###
|
229 |
-
AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
|
230 |
-
|
231 |
-
### OBJECTIVE ###
|
232 |
-
Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
|
233 |
-
|
234 |
-
### STYLE ###
|
235 |
-
Professional, clear, and focused on code quality.
|
236 |
-
|
237 |
-
### TONE ###
|
238 |
-
Informative, helpful, and user-centric.
|
239 |
-
|
240 |
-
### AUDIENCE ###
|
241 |
-
Users seeking to implement their requests using AutoGen v0.4 agents.
|
242 |
-
|
243 |
-
### RESPONSE FORMAT ###
|
244 |
-
Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
|
245 |
-
|
246 |
-
### TEAM PERSONAS’ CONTRIBUTIONS ###
|
247 |
-
- **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
|
248 |
-
- **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
|
249 |
-
- **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
|
250 |
-
- **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
|
251 |
-
- **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
|
252 |
-
|
253 |
-
### SYSTEM GUARDRAILS ###
|
254 |
-
- If unsure about the user's request, ask clarifying questions rather than making assumptions.
|
255 |
-
- Do not fabricate data or features not supported by AutoGen v0.4.
|
256 |
-
- Ensure the code is scalable, modular, and adheres to best practices.
|
257 |
-
|
258 |
-
### START ###
|
259 |
-
'''
|
260 |
},
|
261 |
-
{
|
262 |
-
"role": "user",
|
263 |
-
"content": prompt
|
264 |
-
}
|
265 |
]
|
266 |
|
267 |
completion = hf_client.chat.completions.create(
|
@@ -270,9 +227,6 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
|
|
270 |
max_tokens=500
|
271 |
)
|
272 |
response = completion.choices[0].message.content
|
273 |
-
print(f"[LOG] Using Hugging Face model (CPU): {MODEL_NAME}")
|
274 |
-
print(f"[LOG] Hugging Face response: {response[:200]}...")
|
275 |
-
|
276 |
elif model_provider.lower() == "openai":
|
277 |
response = client.chat.completions.create(
|
278 |
model=os.environ.get("OPENAI_MODEL"),
|
@@ -282,10 +236,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
|
|
282 |
]
|
283 |
)
|
284 |
response = response.choices[0].message.content
|
285 |
-
print(f"[LOG] Using OpenAI model: {os.environ.get('OPENAI_MODEL')}")
|
286 |
-
print(f"[LOG] OpenAI response: {response[:200]}...") # Log first 200 chars of response
|
287 |
|
288 |
-
# Update chat history with new message pair
|
289 |
history.append((question, response))
|
290 |
return history
|
291 |
except Exception as e:
|
@@ -294,8 +245,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
|
|
294 |
history.append((question, error_msg))
|
295 |
return history
|
296 |
|
297 |
-
|
298 |
-
# Create Gradio interface
|
299 |
print("[CHAT] Initializing chat interface...")
|
300 |
with gr.Blocks() as demo:
|
301 |
gr.Markdown(f"""
|
@@ -327,23 +277,18 @@ with gr.Blocks() as demo:
|
|
327 |
submit_btn = gr.Button("Submit")
|
328 |
clear_btn = gr.Button("Clear")
|
329 |
|
330 |
-
# Event handlers
|
331 |
submit_btn.click(
|
332 |
fn=generate_response,
|
333 |
inputs=[question, chatbot],
|
334 |
outputs=[chatbot],
|
335 |
queue=True
|
336 |
)
|
337 |
-
print("[CHAT] Submit button handler configured")
|
338 |
|
339 |
clear_btn.click(
|
340 |
lambda: (None, ""),
|
341 |
inputs=[],
|
342 |
outputs=[chatbot, question]
|
343 |
)
|
344 |
-
print("[CHAT] Clear button handler configured")
|
345 |
-
|
346 |
|
347 |
if __name__ == "__main__":
|
348 |
demo.launch()
|
349 |
-
|
|
|
23 |
|
24 |
DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
|
25 |
|
26 |
+
# Validate API keys
|
27 |
+
assert os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN"), "API keys are not set in the environment variables."
|
28 |
+
|
29 |
os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
|
30 |
os.environ['OPENAI_MODEL'] = "gpt-4"
|
31 |
os.environ['MODEL_PROVIDER'] = "huggingface"
|
|
|
48 |
)
|
49 |
|
50 |
# Load the Hugging Face dataset
|
51 |
+
try:
|
52 |
+
dataset = load_dataset('tosin2013/autogen', streaming=True)
|
53 |
+
dataset = Dataset.from_list(list(dataset['train']))
|
54 |
+
except Exception as e:
|
55 |
+
print(f"[ERROR] Failed to load dataset: {e}")
|
56 |
+
exit(1)
|
57 |
|
58 |
# Initialize embeddings
|
59 |
print("[EMBEDDINGS] Loading sentence-transformers model...")
|
|
|
69 |
# Create and cache embeddings for the texts
|
70 |
if not os.path.exists('embeddings.npy'):
|
71 |
print("[LOG] Generating embeddings...")
|
|
|
72 |
text_embeddings = embeddings.embed_documents(texts)
|
73 |
print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
|
74 |
np.save('embeddings.npy', text_embeddings)
|
|
|
89 |
import pickle
|
90 |
with open('nn_model.pkl', 'rb') as f:
|
91 |
nn = pickle.load(f)
|
92 |
+
|
93 |
@spaces.GPU
|
94 |
def get_relevant_documents(query, k=5):
|
95 |
"""
|
|
|
107 |
elapsed_time = time.time() - start_time
|
108 |
print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
|
109 |
return relevant_docs
|
110 |
+
|
111 |
@spaces.GPU
|
112 |
def generate_response(question, history):
|
113 |
import time
|
|
|
122 |
elapsed_time = time.time() - start_time
|
123 |
print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
|
124 |
return response
|
125 |
+
|
126 |
@spaces.GPU
|
127 |
def _generate_response_gpu(question, history):
|
128 |
print(f"\n[LOG] Received question: {question}")
|
|
|
142 |
"role": "system",
|
143 |
"content": '''### MEMORY ###
|
144 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
|
|
145 |
### VISIONARY GUIDANCE ###
|
146 |
This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
|
|
|
147 |
### CONTEXT ###
|
148 |
AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
|
|
|
149 |
### OBJECTIVE ###
|
150 |
Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
|
|
|
151 |
### STYLE ###
|
152 |
Professional, clear, and focused on code quality.
|
|
|
153 |
### TONE ###
|
154 |
Informative, helpful, and user-centric.
|
|
|
155 |
### AUDIENCE ###
|
156 |
Users seeking to implement their requests using AutoGen v0.4 agents.
|
|
|
157 |
### RESPONSE FORMAT ###
|
158 |
Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
|
|
|
159 |
### TEAM PERSONAS’ CONTRIBUTIONS ###
|
160 |
- **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
|
161 |
- **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
|
162 |
- **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
|
163 |
- **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
|
164 |
- **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
|
|
|
165 |
### SYSTEM GUARDRAILS ###
|
166 |
- If unsure about the user's request, ask clarifying questions rather than making assumptions.
|
167 |
- Do not fabricate data or features not supported by AutoGen v0.4.
|
168 |
- Ensure the code is scalable, modular, and adheres to best practices.
|
|
|
169 |
### START ###
|
170 |
'''
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"role": "user",
|
174 |
+
"content": prompt
|
175 |
+
}
|
176 |
+
]
|
177 |
+
|
178 |
completion = hf_client.chat.completions.create(
|
179 |
model=MODEL_NAME,
|
180 |
messages=messages,
|
|
|
199 |
# Update chat history with new message pair
|
200 |
history.append((question, response))
|
201 |
return history
|
202 |
+
|
203 |
+
# Simplified CPU fallback
|
204 |
@spaces.GPU
|
205 |
def _generate_response_cpu(question, history):
|
206 |
print(f"[LOG] Running on CPU")
|
207 |
try:
|
|
|
208 |
relevant_docs = get_relevant_documents(question, k=3)
|
|
|
|
|
|
|
209 |
context = "\n".join(relevant_docs)
|
210 |
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
|
211 |
+
print(f"[LOG] Generated prompt: {prompt[:200]}...")
|
212 |
|
213 |
if model_provider.lower() == "huggingface":
|
214 |
# Use CPU version of the model
|
215 |
messages = [
|
216 |
{
|
217 |
"role": "system",
|
218 |
+
"content": '''### MEMORY ###\nRecall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
219 |
+
### SYSTEM GUARDRAILS ###'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
},
|
221 |
+
{"role": "user", "content": prompt}
|
|
|
|
|
|
|
222 |
]
|
223 |
|
224 |
completion = hf_client.chat.completions.create(
|
|
|
227 |
max_tokens=500
|
228 |
)
|
229 |
response = completion.choices[0].message.content
|
|
|
|
|
|
|
230 |
elif model_provider.lower() == "openai":
|
231 |
response = client.chat.completions.create(
|
232 |
model=os.environ.get("OPENAI_MODEL"),
|
|
|
236 |
]
|
237 |
)
|
238 |
response = response.choices[0].message.content
|
|
|
|
|
239 |
|
|
|
240 |
history.append((question, response))
|
241 |
return history
|
242 |
except Exception as e:
|
|
|
245 |
history.append((question, error_msg))
|
246 |
return history
|
247 |
|
248 |
+
# Gradio interface
|
|
|
249 |
print("[CHAT] Initializing chat interface...")
|
250 |
with gr.Blocks() as demo:
|
251 |
gr.Markdown(f"""
|
|
|
277 |
submit_btn = gr.Button("Submit")
|
278 |
clear_btn = gr.Button("Clear")
|
279 |
|
|
|
280 |
submit_btn.click(
|
281 |
fn=generate_response,
|
282 |
inputs=[question, chatbot],
|
283 |
outputs=[chatbot],
|
284 |
queue=True
|
285 |
)
|
|
|
286 |
|
287 |
clear_btn.click(
|
288 |
lambda: (None, ""),
|
289 |
inputs=[],
|
290 |
outputs=[chatbot, question]
|
291 |
)
|
|
|
|
|
292 |
|
293 |
if __name__ == "__main__":
|
294 |
demo.launch()
|
|