wuhp commited on
Commit
7714f74
·
verified ·
1 Parent(s): 5138a85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -8
app.py CHANGED
@@ -9,6 +9,70 @@ model_ids = {
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
10
  }
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Function to load model and tokenizer (slightly adjusted device_map)
13
  def load_model_and_tokenizer(model_id):
14
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
@@ -52,7 +116,7 @@ def retrieve_from_memory(query, top_k=2):
52
 
53
  # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
54
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
55
- def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added settings as arguments
56
  global shared_memory
57
  shared_memory = [] # Clear memory for each new request
58
 
@@ -62,7 +126,10 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
62
  print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
63
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
64
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
65
- prompt_1_5b = f"Context from Shared Memory:\n{context_1_5b}\n\nYou are a quick idea generator. Generate an initial response to the following user request, considering the context above:\n\nUser Request: {user_prompt}\n\nInitial Response:"
 
 
 
66
  input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
67
  output_1_5b = models["1.5B"].generate(
68
  input_ids_1_5b,
@@ -79,7 +146,11 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
79
  print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
80
  retrieved_memory_7b = retrieve_from_memory(response_1_5b)
81
  context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
82
- prompt_7b = f"Context from Shared Memory:\n{context_7b}\n\nYou are a detailed elaborator. Take the following initial response and elaborate on it, adding more detail and reasoning, considering the context above. \n\nInitial Response:\n{response_1_5b}\n\nElaborated Response:"
 
 
 
 
83
  input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
84
  output_7b = models["7B"].generate(
85
  input_ids_7b,
@@ -96,11 +167,13 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
96
 
97
 
98
  # --- Gradio ChatInterface ---
99
- def gradio_interface(message, history, temperature, top_p, max_tokens): # Accept settings from interface
100
  # history is automatically managed by ChatInterface
101
  response = swarm_agent_sequential_rag(
102
  message,
103
- temperature=temperature,
 
 
104
  top_p=top_p,
105
  max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
106
  )
@@ -108,14 +181,16 @@ def gradio_interface(message, history, temperature, top_p, max_tokens): # Accept
108
 
109
  iface = gr.ChatInterface( # Using ChatInterface now
110
  fn=gradio_interface,
111
- # Define additional inputs for settings
112
  additional_inputs=[
113
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
114
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
115
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
 
 
116
  ],
117
- title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models)", # Updated title
118
- description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory and adjustable settings. **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
119
  )
120
 
121
  if __name__ == "__main__":
 
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
10
  }
11
 
12
+ # Default Prompts - User can override these in the UI
13
+ default_prompt_1_5b = """**Code Analysis Task**
14
+ As a Senior Code Analyst, process this programming problem:
15
+
16
+ **User Request**
17
+ {user_prompt}
18
+
19
+ **Context from Memory**
20
+ {context_1_5b}
21
+
22
+ **Required Output Format**
23
+ 1. Problem Breakdown:
24
+ - Input/Output requirements
25
+ - Key constraints
26
+ - Edge cases to consider
27
+
28
+ 2. Approach Options:
29
+ - [Option 1] Algorithm/data structure choices
30
+ - [Option 2] Alternative solutions
31
+ - Time/space complexity analysis
32
+
33
+ 3. Recommended Strategy:
34
+ - Best approach selection rationale
35
+ - Potential pitfalls to avoid
36
+
37
+ 4. Initial Pseudocode Sketch:
38
+ - High-level structure
39
+ - Critical function definitions"""
40
+
41
+ default_prompt_7b = """**Code Implementation Task**
42
+ As a Principal Software Engineer, finalize this solution:
43
+
44
+ **Initial Analysis**
45
+ {response_1_5b}
46
+
47
+ **Context from Memory**
48
+ {context_7b}
49
+
50
+ **Required Output Format**
51
+ 1. Optimized Solution:
52
+ - Final algorithm choice justification
53
+ - Complexity analysis (Big O)
54
+
55
+ 2. Production-Grade Code:
56
+ - Clean, modular implementation
57
+ - Language: [Python/JS/etc] (infer from question)
58
+ - Error handling
59
+ - Documentation
60
+
61
+ 3. Testing Plan:
62
+ - Sample test cases (normal/edge cases)
63
+ - Potential failure points
64
+
65
+ 4. Optimization Opportunities:
66
+ - Alternative approaches for different constraints
67
+ - Parallelization/performance tips
68
+ - Memory management considerations
69
+
70
+ 5. Debugging Guide:
71
+ - Common mistakes
72
+ - Logging suggestions
73
+ - Step-through example"""
74
+
75
+
76
  # Function to load model and tokenizer (slightly adjusted device_map)
77
  def load_model_and_tokenizer(model_id):
78
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
116
 
117
  # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
118
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
119
+ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added prompt templates as arguments
120
  global shared_memory
121
  shared_memory = [] # Clear memory for each new request
122
 
 
126
  print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
127
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
128
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
129
+
130
+ # Use user-provided prompt template for 1.5B model
131
+ prompt_1_5b = prompt_1_5b_template.format(user_prompt=user_prompt, context_1_5b=context_1_5b)
132
+
133
  input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
134
  output_1_5b = models["1.5B"].generate(
135
  input_ids_1_5b,
 
146
  print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
147
  retrieved_memory_7b = retrieve_from_memory(response_1_5b)
148
  context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
149
+
150
+ # Use user-provided prompt template for 7B model
151
+ prompt_7b = prompt_7b_template.format(response_1_5b=response_1_5b, context_7b=context_7b)
152
+
153
+
154
  input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
155
  output_7b = models["7B"].generate(
156
  input_ids_7b,
 
167
 
168
 
169
  # --- Gradio ChatInterface ---
170
+ def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
171
  # history is automatically managed by ChatInterface
172
  response = swarm_agent_sequential_rag(
173
  message,
174
+ prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
175
+ prompt_7b_template=prompt_7b_text,
176
+ temperature=temp,
177
  top_p=top_p,
178
  max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
179
  )
 
181
 
182
  iface = gr.ChatInterface( # Using ChatInterface now
183
  fn=gradio_interface,
184
+ # Define additional inputs for settings and prompts
185
  additional_inputs=[
186
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
187
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
188
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
189
+ gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
190
+ gr.Textbox(value=default_prompt_7b, lines=10, label="7B Model Prompt Template"), # Textbox for 7B prompt
191
  ],
192
+ title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models) - PROMPT CUSTOMIZATION", # Updated title
193
+ description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory, adjustable settings, **and customizable prompts!** **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
194
  )
195
 
196
  if __name__ == "__main__":