tosin2013 commited on
Commit
edcf891
·
verified ·
1 Parent(s): 655d8c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -81
app.py CHANGED
@@ -23,7 +23,9 @@ from huggingface_hub import InferenceClient
23
 
24
  DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
25
 
26
- # Set API keys (make sure these are set in your environment)
 
 
27
  os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
28
  os.environ['OPENAI_MODEL'] = "gpt-4"
29
  os.environ['MODEL_PROVIDER'] = "huggingface"
@@ -46,8 +48,12 @@ else:
46
  )
47
 
48
  # Load the Hugging Face dataset
49
- dataset = load_dataset('tosin2013/autogen', streaming=True)
50
- dataset = Dataset.from_list(list(dataset['train']))
 
 
 
 
51
 
52
  # Initialize embeddings
53
  print("[EMBEDDINGS] Loading sentence-transformers model...")
@@ -63,7 +69,6 @@ texts = dataset['input']
63
  # Create and cache embeddings for the texts
64
  if not os.path.exists('embeddings.npy'):
65
  print("[LOG] Generating embeddings...")
66
- print("[EMBEDDINGS] Generating document embeddings...")
67
  text_embeddings = embeddings.embed_documents(texts)
68
  print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
69
  np.save('embeddings.npy', text_embeddings)
@@ -84,6 +89,7 @@ else:
84
  import pickle
85
  with open('nn_model.pkl', 'rb') as f:
86
  nn = pickle.load(f)
 
87
  @spaces.GPU
88
  def get_relevant_documents(query, k=5):
89
  """
@@ -101,6 +107,7 @@ def get_relevant_documents(query, k=5):
101
  elapsed_time = time.time() - start_time
102
  print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
103
  return relevant_docs
 
104
  @spaces.GPU
105
  def generate_response(question, history):
106
  import time
@@ -115,6 +122,7 @@ def generate_response(question, history):
115
  elapsed_time = time.time() - start_time
116
  print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
117
  return response
 
118
  @spaces.GPU
119
  def _generate_response_gpu(question, history):
120
  print(f"\n[LOG] Received question: {question}")
@@ -134,49 +142,39 @@ def _generate_response_gpu(question, history):
134
  "role": "system",
135
  "content": '''### MEMORY ###
136
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
137
-
138
  ### VISIONARY GUIDANCE ###
139
  This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
140
-
141
  ### CONTEXT ###
142
  AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
143
-
144
  ### OBJECTIVE ###
145
  Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
146
-
147
  ### STYLE ###
148
  Professional, clear, and focused on code quality.
149
-
150
  ### TONE ###
151
  Informative, helpful, and user-centric.
152
-
153
  ### AUDIENCE ###
154
  Users seeking to implement their requests using AutoGen v0.4 agents.
155
-
156
  ### RESPONSE FORMAT ###
157
  Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
158
-
159
  ### TEAM PERSONAS’ CONTRIBUTIONS ###
160
  - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
161
  - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
162
  - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
163
  - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
164
  - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
165
-
166
  ### SYSTEM GUARDRAILS ###
167
  - If unsure about the user's request, ask clarifying questions rather than making assumptions.
168
  - Do not fabricate data or features not supported by AutoGen v0.4.
169
  - Ensure the code is scalable, modular, and adheres to best practices.
170
-
171
  ### START ###
172
  '''
173
- },
174
- {
175
- "role": "user",
176
- "content": prompt
177
- }
178
- ]
179
-
180
  completion = hf_client.chat.completions.create(
181
  model=MODEL_NAME,
182
  messages=messages,
@@ -201,67 +199,26 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
201
  # Update chat history with new message pair
202
  history.append((question, response))
203
  return history
 
 
204
  @spaces.GPU
205
  def _generate_response_cpu(question, history):
206
  print(f"[LOG] Running on CPU")
207
  try:
208
- # Get relevant documents based on the query
209
  relevant_docs = get_relevant_documents(question, k=3)
210
- print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
211
-
212
- # Create the prompt for the LLM
213
  context = "\n".join(relevant_docs)
214
  prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
215
- print(f"[LOG] Generated prompt: {prompt[:200]}...") # Log first 200 chars of prompt
216
 
217
  if model_provider.lower() == "huggingface":
218
  # Use CPU version of the model
219
  messages = [
220
  {
221
  "role": "system",
222
- "content": '''### MEMORY ###
223
- Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
224
-
225
- ### VISIONARY GUIDANCE ###
226
- This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
227
-
228
- ### CONTEXT ###
229
- AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
230
-
231
- ### OBJECTIVE ###
232
- Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
233
-
234
- ### STYLE ###
235
- Professional, clear, and focused on code quality.
236
-
237
- ### TONE ###
238
- Informative, helpful, and user-centric.
239
-
240
- ### AUDIENCE ###
241
- Users seeking to implement their requests using AutoGen v0.4 agents.
242
-
243
- ### RESPONSE FORMAT ###
244
- Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
245
-
246
- ### TEAM PERSONAS’ CONTRIBUTIONS ###
247
- - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
248
- - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
249
- - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
250
- - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
251
- - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
252
-
253
- ### SYSTEM GUARDRAILS ###
254
- - If unsure about the user's request, ask clarifying questions rather than making assumptions.
255
- - Do not fabricate data or features not supported by AutoGen v0.4.
256
- - Ensure the code is scalable, modular, and adheres to best practices.
257
-
258
- ### START ###
259
- '''
260
  },
261
- {
262
- "role": "user",
263
- "content": prompt
264
- }
265
  ]
266
 
267
  completion = hf_client.chat.completions.create(
@@ -270,9 +227,6 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
270
  max_tokens=500
271
  )
272
  response = completion.choices[0].message.content
273
- print(f"[LOG] Using Hugging Face model (CPU): {MODEL_NAME}")
274
- print(f"[LOG] Hugging Face response: {response[:200]}...")
275
-
276
  elif model_provider.lower() == "openai":
277
  response = client.chat.completions.create(
278
  model=os.environ.get("OPENAI_MODEL"),
@@ -282,10 +236,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
282
  ]
283
  )
284
  response = response.choices[0].message.content
285
- print(f"[LOG] Using OpenAI model: {os.environ.get('OPENAI_MODEL')}")
286
- print(f"[LOG] OpenAI response: {response[:200]}...") # Log first 200 chars of response
287
 
288
- # Update chat history with new message pair
289
  history.append((question, response))
290
  return history
291
  except Exception as e:
@@ -294,8 +245,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
294
  history.append((question, error_msg))
295
  return history
296
 
297
-
298
- # Create Gradio interface
299
  print("[CHAT] Initializing chat interface...")
300
  with gr.Blocks() as demo:
301
  gr.Markdown(f"""
@@ -327,23 +277,18 @@ with gr.Blocks() as demo:
327
  submit_btn = gr.Button("Submit")
328
  clear_btn = gr.Button("Clear")
329
 
330
- # Event handlers
331
  submit_btn.click(
332
  fn=generate_response,
333
  inputs=[question, chatbot],
334
  outputs=[chatbot],
335
  queue=True
336
  )
337
- print("[CHAT] Submit button handler configured")
338
 
339
  clear_btn.click(
340
  lambda: (None, ""),
341
  inputs=[],
342
  outputs=[chatbot, question]
343
  )
344
- print("[CHAT] Clear button handler configured")
345
-
346
 
347
  if __name__ == "__main__":
348
  demo.launch()
349
-
 
23
 
24
  DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
25
 
26
+ # Validate API keys
27
+ assert os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN"), "API keys are not set in the environment variables."
28
+
29
  os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
30
  os.environ['OPENAI_MODEL'] = "gpt-4"
31
  os.environ['MODEL_PROVIDER'] = "huggingface"
 
48
  )
49
 
50
  # Load the Hugging Face dataset
51
+ try:
52
+ dataset = load_dataset('tosin2013/autogen', streaming=True)
53
+ dataset = Dataset.from_list(list(dataset['train']))
54
+ except Exception as e:
55
+ print(f"[ERROR] Failed to load dataset: {e}")
56
+ exit(1)
57
 
58
  # Initialize embeddings
59
  print("[EMBEDDINGS] Loading sentence-transformers model...")
 
69
  # Create and cache embeddings for the texts
70
  if not os.path.exists('embeddings.npy'):
71
  print("[LOG] Generating embeddings...")
 
72
  text_embeddings = embeddings.embed_documents(texts)
73
  print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
74
  np.save('embeddings.npy', text_embeddings)
 
89
  import pickle
90
  with open('nn_model.pkl', 'rb') as f:
91
  nn = pickle.load(f)
92
+
93
  @spaces.GPU
94
  def get_relevant_documents(query, k=5):
95
  """
 
107
  elapsed_time = time.time() - start_time
108
  print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
109
  return relevant_docs
110
+
111
  @spaces.GPU
112
  def generate_response(question, history):
113
  import time
 
122
  elapsed_time = time.time() - start_time
123
  print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
124
  return response
125
+
126
  @spaces.GPU
127
  def _generate_response_gpu(question, history):
128
  print(f"\n[LOG] Received question: {question}")
 
142
  "role": "system",
143
  "content": '''### MEMORY ###
144
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
 
145
  ### VISIONARY GUIDANCE ###
146
  This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
 
147
  ### CONTEXT ###
148
  AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
 
149
  ### OBJECTIVE ###
150
  Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
 
151
  ### STYLE ###
152
  Professional, clear, and focused on code quality.
 
153
  ### TONE ###
154
  Informative, helpful, and user-centric.
 
155
  ### AUDIENCE ###
156
  Users seeking to implement their requests using AutoGen v0.4 agents.
 
157
  ### RESPONSE FORMAT ###
158
  Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
 
159
  ### TEAM PERSONAS’ CONTRIBUTIONS ###
160
  - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
161
  - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
162
  - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
163
  - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
164
  - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
 
165
  ### SYSTEM GUARDRAILS ###
166
  - If unsure about the user's request, ask clarifying questions rather than making assumptions.
167
  - Do not fabricate data or features not supported by AutoGen v0.4.
168
  - Ensure the code is scalable, modular, and adheres to best practices.
 
169
  ### START ###
170
  '''
171
+ },
172
+ {
173
+ "role": "user",
174
+ "content": prompt
175
+ }
176
+ ]
177
+
178
  completion = hf_client.chat.completions.create(
179
  model=MODEL_NAME,
180
  messages=messages,
 
199
  # Update chat history with new message pair
200
  history.append((question, response))
201
  return history
202
+
203
+ # Simplified CPU fallback
204
  @spaces.GPU
205
  def _generate_response_cpu(question, history):
206
  print(f"[LOG] Running on CPU")
207
  try:
 
208
  relevant_docs = get_relevant_documents(question, k=3)
 
 
 
209
  context = "\n".join(relevant_docs)
210
  prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
211
+ print(f"[LOG] Generated prompt: {prompt[:200]}...")
212
 
213
  if model_provider.lower() == "huggingface":
214
  # Use CPU version of the model
215
  messages = [
216
  {
217
  "role": "system",
218
+ "content": '''### MEMORY ###\nRecall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
219
+ ### SYSTEM GUARDRAILS ###'''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  },
221
+ {"role": "user", "content": prompt}
 
 
 
222
  ]
223
 
224
  completion = hf_client.chat.completions.create(
 
227
  max_tokens=500
228
  )
229
  response = completion.choices[0].message.content
 
 
 
230
  elif model_provider.lower() == "openai":
231
  response = client.chat.completions.create(
232
  model=os.environ.get("OPENAI_MODEL"),
 
236
  ]
237
  )
238
  response = response.choices[0].message.content
 
 
239
 
 
240
  history.append((question, response))
241
  return history
242
  except Exception as e:
 
245
  history.append((question, error_msg))
246
  return history
247
 
248
+ # Gradio interface
 
249
  print("[CHAT] Initializing chat interface...")
250
  with gr.Blocks() as demo:
251
  gr.Markdown(f"""
 
277
  submit_btn = gr.Button("Submit")
278
  clear_btn = gr.Button("Clear")
279
 
 
280
  submit_btn.click(
281
  fn=generate_response,
282
  inputs=[question, chatbot],
283
  outputs=[chatbot],
284
  queue=True
285
  )
 
286
 
287
  clear_btn.click(
288
  lambda: (None, ""),
289
  inputs=[],
290
  outputs=[chatbot, question]
291
  )
 
 
292
 
293
  if __name__ == "__main__":
294
  demo.launch()