Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,70 @@ model_ids = {
|
|
9 |
"7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
10 |
}
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Function to load model and tokenizer (slightly adjusted device_map)
|
13 |
def load_model_and_tokenizer(model_id):
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
@@ -52,7 +116,7 @@ def retrieve_from_memory(query, top_k=2):
|
|
52 |
|
53 |
# --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
|
54 |
@spaces.GPU # <---- GPU DECORATOR ADDED HERE!
|
55 |
-
def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added
|
56 |
global shared_memory
|
57 |
shared_memory = [] # Clear memory for each new request
|
58 |
|
@@ -62,7 +126,10 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
|
|
62 |
print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
|
63 |
retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
|
64 |
context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
|
65 |
-
|
|
|
|
|
|
|
66 |
input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
|
67 |
output_1_5b = models["1.5B"].generate(
|
68 |
input_ids_1_5b,
|
@@ -79,7 +146,11 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
|
|
79 |
print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
|
80 |
retrieved_memory_7b = retrieve_from_memory(response_1_5b)
|
81 |
context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
|
82 |
-
|
|
|
|
|
|
|
|
|
83 |
input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
|
84 |
output_7b = models["7B"].generate(
|
85 |
input_ids_7b,
|
@@ -96,11 +167,13 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
|
|
96 |
|
97 |
|
98 |
# --- Gradio ChatInterface ---
|
99 |
-
def gradio_interface(message, history,
|
100 |
# history is automatically managed by ChatInterface
|
101 |
response = swarm_agent_sequential_rag(
|
102 |
message,
|
103 |
-
|
|
|
|
|
104 |
top_p=top_p,
|
105 |
max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
|
106 |
)
|
@@ -108,14 +181,16 @@ def gradio_interface(message, history, temperature, top_p, max_tokens): # Accept
|
|
108 |
|
109 |
iface = gr.ChatInterface( # Using ChatInterface now
|
110 |
fn=gradio_interface,
|
111 |
-
# Define additional inputs for settings
|
112 |
additional_inputs=[
|
113 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
|
114 |
gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
|
115 |
gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
|
|
|
|
|
116 |
],
|
117 |
-
title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models)", # Updated title
|
118 |
-
description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory
|
119 |
)
|
120 |
|
121 |
if __name__ == "__main__":
|
|
|
9 |
"7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
10 |
}
|
11 |
|
12 |
+
# Default Prompts - User can override these in the UI
|
13 |
+
default_prompt_1_5b = """**Code Analysis Task**
|
14 |
+
As a Senior Code Analyst, process this programming problem:
|
15 |
+
|
16 |
+
**User Request**
|
17 |
+
{user_prompt}
|
18 |
+
|
19 |
+
**Context from Memory**
|
20 |
+
{context_1_5b}
|
21 |
+
|
22 |
+
**Required Output Format**
|
23 |
+
1. Problem Breakdown:
|
24 |
+
- Input/Output requirements
|
25 |
+
- Key constraints
|
26 |
+
- Edge cases to consider
|
27 |
+
|
28 |
+
2. Approach Options:
|
29 |
+
- [Option 1] Algorithm/data structure choices
|
30 |
+
- [Option 2] Alternative solutions
|
31 |
+
- Time/space complexity analysis
|
32 |
+
|
33 |
+
3. Recommended Strategy:
|
34 |
+
- Best approach selection rationale
|
35 |
+
- Potential pitfalls to avoid
|
36 |
+
|
37 |
+
4. Initial Pseudocode Sketch:
|
38 |
+
- High-level structure
|
39 |
+
- Critical function definitions"""
|
40 |
+
|
41 |
+
default_prompt_7b = """**Code Implementation Task**
|
42 |
+
As a Principal Software Engineer, finalize this solution:
|
43 |
+
|
44 |
+
**Initial Analysis**
|
45 |
+
{response_1_5b}
|
46 |
+
|
47 |
+
**Context from Memory**
|
48 |
+
{context_7b}
|
49 |
+
|
50 |
+
**Required Output Format**
|
51 |
+
1. Optimized Solution:
|
52 |
+
- Final algorithm choice justification
|
53 |
+
- Complexity analysis (Big O)
|
54 |
+
|
55 |
+
2. Production-Grade Code:
|
56 |
+
- Clean, modular implementation
|
57 |
+
- Language: [Python/JS/etc] (infer from question)
|
58 |
+
- Error handling
|
59 |
+
- Documentation
|
60 |
+
|
61 |
+
3. Testing Plan:
|
62 |
+
- Sample test cases (normal/edge cases)
|
63 |
+
- Potential failure points
|
64 |
+
|
65 |
+
4. Optimization Opportunities:
|
66 |
+
- Alternative approaches for different constraints
|
67 |
+
- Parallelization/performance tips
|
68 |
+
- Memory management considerations
|
69 |
+
|
70 |
+
5. Debugging Guide:
|
71 |
+
- Common mistakes
|
72 |
+
- Logging suggestions
|
73 |
+
- Step-through example"""
|
74 |
+
|
75 |
+
|
76 |
# Function to load model and tokenizer (slightly adjusted device_map)
|
77 |
def load_model_and_tokenizer(model_id):
|
78 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
|
|
116 |
|
117 |
# --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
|
118 |
@spaces.GPU # <---- GPU DECORATOR ADDED HERE!
|
119 |
+
def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added prompt templates as arguments
|
120 |
global shared_memory
|
121 |
shared_memory = [] # Clear memory for each new request
|
122 |
|
|
|
126 |
print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
|
127 |
retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
|
128 |
context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
|
129 |
+
|
130 |
+
# Use user-provided prompt template for 1.5B model
|
131 |
+
prompt_1_5b = prompt_1_5b_template.format(user_prompt=user_prompt, context_1_5b=context_1_5b)
|
132 |
+
|
133 |
input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
|
134 |
output_1_5b = models["1.5B"].generate(
|
135 |
input_ids_1_5b,
|
|
|
146 |
print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
|
147 |
retrieved_memory_7b = retrieve_from_memory(response_1_5b)
|
148 |
context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
|
149 |
+
|
150 |
+
# Use user-provided prompt template for 7B model
|
151 |
+
prompt_7b = prompt_7b_template.format(response_1_5b=response_1_5b, context_7b=context_7b)
|
152 |
+
|
153 |
+
|
154 |
input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
|
155 |
output_7b = models["7B"].generate(
|
156 |
input_ids_7b,
|
|
|
167 |
|
168 |
|
169 |
# --- Gradio ChatInterface ---
|
170 |
+
def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
|
171 |
# history is automatically managed by ChatInterface
|
172 |
response = swarm_agent_sequential_rag(
|
173 |
message,
|
174 |
+
prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
|
175 |
+
prompt_7b_template=prompt_7b_text,
|
176 |
+
temperature=temp,
|
177 |
top_p=top_p,
|
178 |
max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
|
179 |
)
|
|
|
181 |
|
182 |
iface = gr.ChatInterface( # Using ChatInterface now
|
183 |
fn=gradio_interface,
|
184 |
+
# Define additional inputs for settings and prompts
|
185 |
additional_inputs=[
|
186 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
|
187 |
gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
|
188 |
gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
|
189 |
+
gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
|
190 |
+
gr.Textbox(value=default_prompt_7b, lines=10, label="7B Model Prompt Template"), # Textbox for 7B prompt
|
191 |
],
|
192 |
+
title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models) - PROMPT CUSTOMIZATION", # Updated title
|
193 |
+
description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory, adjustable settings, **and customizable prompts!** **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
|
194 |
)
|
195 |
|
196 |
if __name__ == "__main__":
|