Spaces:

wuhp
/

r1-agents

Running on Zero

App Files Files Community

wuhp commited on Feb 3

Commit

7714f74

verified ·

1 Parent(s): 5138a85

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -8

app.py CHANGED Viewed

@@ -9,6 +9,70 @@ model_ids = {
     "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 }
 # Function to load model and tokenizer (slightly adjusted device_map)
 def load_model_and_tokenizer(model_id):
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
@@ -52,7 +116,7 @@ def retrieve_from_memory(query, top_k=2):
 # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
 @spaces.GPU  # <----  GPU DECORATOR ADDED HERE!
-def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added settings as arguments
     global shared_memory
     shared_memory = [] # Clear memory for each new request
@@ -62,7 +126,10 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
     print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
     retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
     context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
-    prompt_1_5b = f"Context from Shared Memory:\n{context_1_5b}\n\nYou are a quick idea generator. Generate an initial response to the following user request, considering the context above:\n\nUser Request: {user_prompt}\n\nInitial Response:"
     input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
     output_1_5b = models["1.5B"].generate(
         input_ids_1_5b,
@@ -79,7 +146,11 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
     print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
     retrieved_memory_7b = retrieve_from_memory(response_1_5b)
     context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
-    prompt_7b = f"Context from Shared Memory:\n{context_7b}\n\nYou are a detailed elaborator. Take the following initial response and elaborate on it, adding more detail and reasoning, considering the context above. \n\nInitial Response:\n{response_1_5b}\n\nElaborated Response:"
     input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
     output_7b = models["7B"].generate(
         input_ids_7b,
@@ -96,11 +167,13 @@ def swarm_agent_sequential_rag(user_prompt, temperature=0.7, top_p=0.9, max_new_
 # --- Gradio ChatInterface ---
-def gradio_interface(message, history, temperature, top_p, max_tokens): # Accept settings from interface
     # history is automatically managed by ChatInterface
     response = swarm_agent_sequential_rag(
         message,
-        temperature=temperature,
         top_p=top_p,
         max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
     )
@@ -108,14 +181,16 @@ def gradio_interface(message, history, temperature, top_p, max_tokens): # Accept
 iface = gr.ChatInterface( # Using ChatInterface now
     fn=gradio_interface,
-    # Define additional inputs for settings
     additional_inputs=[
         gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
         gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
         gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
     ],
-    title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models)", # Updated title
-    description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory and adjustable settings. **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
 )
 if __name__ == "__main__":

     "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 }
+# Default Prompts - User can override these in the UI
+default_prompt_1_5b = """**Code Analysis Task**
+As a Senior Code Analyst, process this programming problem:
+**User Request**
+{user_prompt}
+**Context from Memory**
+{context_1_5b}
+**Required Output Format**
+1. Problem Breakdown:
+   - Input/Output requirements
+   - Key constraints
+   - Edge cases to consider
+2. Approach Options:
+   - [Option 1] Algorithm/data structure choices
+   - [Option 2] Alternative solutions
+   - Time/space complexity analysis
+3. Recommended Strategy:
+   - Best approach selection rationale
+   - Potential pitfalls to avoid
+4. Initial Pseudocode Sketch:
+   - High-level structure
+   - Critical function definitions"""
+default_prompt_7b = """**Code Implementation Task**
+As a Principal Software Engineer, finalize this solution:
+**Initial Analysis**
+{response_1_5b}
+**Context from Memory**
+{context_7b}
+**Required Output Format**
+1. Optimized Solution:
+   - Final algorithm choice justification
+   - Complexity analysis (Big O)
+2. Production-Grade Code:
+   - Clean, modular implementation
+   - Language: [Python/JS/etc] (infer from question)
+   - Error handling
+   - Documentation
+3. Testing Plan:
+   - Sample test cases (normal/edge cases)
+   - Potential failure points
+4. Optimization Opportunities:
+   - Alternative approaches for different constraints
+   - Parallelization/performance tips
+   - Memory management considerations
+5. Debugging Guide:
+   - Common mistakes
+   - Logging suggestions
+   - Step-through example"""
 # Function to load model and tokenizer (slightly adjusted device_map)
 def load_model_and_tokenizer(model_id):
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
 @spaces.GPU  # <----  GPU DECORATOR ADDED HERE!
+def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added prompt templates as arguments
     global shared_memory
     shared_memory = [] # Clear memory for each new request
     print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
     retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
     context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
+    # Use user-provided prompt template for 1.5B model
+    prompt_1_5b = prompt_1_5b_template.format(user_prompt=user_prompt, context_1_5b=context_1_5b)
     input_ids_1_5b = tokenizers["1.5B"].encode(prompt_1_5b, return_tensors="pt").to(models["1.5B"].device)
     output_1_5b = models["1.5B"].generate(
         input_ids_1_5b,
     print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
     retrieved_memory_7b = retrieve_from_memory(response_1_5b)
     context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
+    # Use user-provided prompt template for 7B model
+    prompt_7b = prompt_7b_template.format(response_1_5b=response_1_5b, context_7b=context_7b)
     input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
     output_7b = models["7B"].generate(
         input_ids_7b,
 # --- Gradio ChatInterface ---
+def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
     # history is automatically managed by ChatInterface
     response = swarm_agent_sequential_rag(
         message,
+        prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
+        prompt_7b_template=prompt_7b_text,
+        temperature=temp,
         top_p=top_p,
         max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
     )
 iface = gr.ChatInterface( # Using ChatInterface now
     fn=gradio_interface,
+    # Define additional inputs for settings and prompts
     additional_inputs=[
         gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
         gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
         gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
+        gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
+        gr.Textbox(value=default_prompt_7b, lines=10, label="7B Model Prompt Template"),   # Textbox for 7B prompt
     ],
+    title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models) - PROMPT CUSTOMIZATION", # Updated title
+    description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory, adjustable settings, **and customizable prompts!** **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
 )
 if __name__ == "__main__":