Spaces:

rui3000
/

RPS_game_assist

Running on Zero

App Files Files Community

rui3000 commited on 8 days ago

Commit

2e54946

verified ·

1 Parent(s): 24a51a6

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -28

app.py CHANGED Viewed

@@ -1,23 +1,27 @@
 import gradio as gr
 import torch
-import time # Import time module
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # --- Configuration ---
 MODEL_ID = "Qwen/Qwen2-1.5B-Instruct"
 # --- Load Model and Tokenizer ---
 print(f"Loading model: {MODEL_ID}")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype="auto",
-    device_map="auto"
 )
 print("Model loaded successfully.")
-# --- Generation Function (Updated to return token count) ---
 def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
     """Generate a response and return it along with the number of generated tokens."""
     num_generated_tokens = 0
@@ -27,8 +31,11 @@ def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
             tokenize=False,
             add_generation_prompt=True
         )
-        model_inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)
-        input_ids_len = model_inputs.input_ids.shape[-1] # Length of input tokens
         generation_kwargs = {
             "max_new_tokens": max_length,
@@ -40,7 +47,7 @@ def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
         print("Generating response...")
         with torch.no_grad():
-            # Generate response - Ensure output_scores or similar isn't needed if just counting
             generated_ids = model.generate(model_inputs.input_ids, **generation_kwargs)
         # Calculate generated tokens
@@ -49,13 +56,15 @@ def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
         response = tokenizer.decode(output_ids, skip_special_tokens=True)
         print("Generation complete.")
-        return response.strip(), num_generated_tokens # Return response and token count
     except Exception as e:
         print(f"Error during generation: {e}")
-        return f"An error occurred: {str(e)}", num_generated_tokens # Return error and token count
-# --- Input Processing Function (Updated for Time/Token outputs) ---
 def process_input(
     player_stats,
     ai_stats,
@@ -66,7 +75,7 @@ def process_input(
     top_p
 ):
     """Process inputs, generate response, and return display info, response, time, and token count."""
     # Construct the user message content
     user_content = f"Player Move Frequency Stats:\n{player_stats}\n\n"
     if ai_stats and ai_stats.strip():
@@ -83,7 +92,7 @@ def process_input(
     start_time = time.time()
     # Generate response from the model
-    response, generated_tokens = generate_response( # Capture token count
         messages,
         max_length=max_length,
         temperature=temperature,
@@ -92,17 +101,17 @@ def process_input(
     # --- Time Measurement End ---
     end_time = time.time()
-    duration = round(end_time - start_time, 2) # Calculate duration
     # For display purposes
     display_prompt = f"System Prompt (if used):\n{system_prompt}\n\n------\n\nUser Content:\n{user_content}"
     # Return all results including time and tokens
     return display_prompt, response, f"{duration} seconds", generated_tokens
-# --- Gradio Interface (Added Time/Token displays, refined System Prompt) ---
-# Refined default system prompt for better reasoning
 DEFAULT_SYSTEM_PROMPT = """You are an expert Rock-Paper-Scissors (RPS) strategist focusing on statistical analysis.
 Your task is to recommend the optimal AI move based *only* on the provided move frequency statistics for the player.
@@ -114,9 +123,8 @@ Follow these steps:
 Base your analysis strictly on the provided frequencies and standard RPS rules."""
-# Default example stats and query
 DEFAULT_PLAYER_STATS = "Rock: 40%\nPaper: 30%\nScissors: 30%"
-DEFAULT_AI_STATS = "" # Keep AI stats optional and clear by default
 DEFAULT_USER_QUERY = "Based *only* on the player's move frequencies, what single move should the AI make next to maximize its statistical chance of winning? Explain your reasoning clearly step-by-step as instructed."
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -138,21 +146,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 info="Ask the specific question based on the stats."
             )
             system_prompt_input = gr.Textbox(
-                label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, # Set default value
-                lines=12 # Adjusted lines
             )
         with gr.Column(scale=1): # Params/Output column
             gr.Markdown("## Generation Parameters")
             max_length_slider = gr.Slider(minimum=50, maximum=1024, value=300, step=16, label="Max New Tokens")
-            temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature") # Lowered default further
             top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P")
             submit_btn = gr.Button("Generate Response", variant="primary")
             gr.Markdown("## Performance Metrics")
-            # Outputs for Time and Tokens
             time_output = gr.Textbox(label="Generation Time", interactive=False)
-            tokens_output = gr.Number(label="Generated Tokens", interactive=False) # Use Number for token count
             gr.Markdown("""
             ## Testing Tips
@@ -162,15 +169,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             """)
     with gr.Row():
-        # Display final prompt and model response (side-by-side)
         final_prompt_display = gr.Textbox(
-            label="Formatted Input Sent to Model (via Chat Template)", lines=20 # Increased lines
         )
         response_display = gr.Textbox(
-            label="Model Response", lines=20, show_copy_button=True # Increased lines
         )
-    # Handle button click - Updated inputs and outputs list
     submit_btn.click(
         process_input,
         inputs=[
@@ -179,11 +184,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         ],
         outputs=[
             final_prompt_display, response_display,
-            time_output, tokens_output # Added new outputs
         ],
-        api_name="generate_rps_frequency_analysis_v2" # Updated api_name
     )
 # --- Launch the demo ---
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+import time
+import spaces # Import the spaces library
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # --- Configuration ---
 MODEL_ID = "Qwen/Qwen2-1.5B-Instruct"
 # --- Load Model and Tokenizer ---
+# Note: Model loading happens when the Space starts.
+# device_map="auto" will attempt to use the GPU when allocated by @spaces.GPU
 print(f"Loading model: {MODEL_ID}")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype="auto",
+    device_map="auto" # Keep this, it helps distribute within the allocated GPU(s)
 )
 print("Model loaded successfully.")
+# --- Generation Function (Returns response and token count) ---
+# This function will run on the GPU allocated via the decorator on process_input
 def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
     """Generate a response and return it along with the number of generated tokens."""
     num_generated_tokens = 0
             tokenize=False,
             add_generation_prompt=True
         )
+        # Ensure model_inputs are sent to the correct device the model is on
+        # device_map='auto' handles this, but explicitly checking model.device is safer
+        device = model.device
+        model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device)
+        input_ids_len = model_inputs.input_ids.shape[-1]
         generation_kwargs = {
             "max_new_tokens": max_length,
         print("Generating response...")
         with torch.no_grad():
+            # Generate response
             generated_ids = model.generate(model_inputs.input_ids, **generation_kwargs)
         # Calculate generated tokens
         response = tokenizer.decode(output_ids, skip_special_tokens=True)
         print("Generation complete.")
+        return response.strip(), num_generated_tokens
     except Exception as e:
         print(f"Error during generation: {e}")
+        # Ensure error message is returned correctly even if tokens couldn't be counted
+        return f"An error occurred: {str(e)}", num_generated_tokens
+# --- Input Processing Function (Decorated for ZeroGPU) ---
+@spaces.GPU # Add the ZeroGPU decorator here
 def process_input(
     player_stats,
     ai_stats,
     top_p
 ):
     """Process inputs, generate response, and return display info, response, time, and token count."""
+    print("GPU requested via decorator, starting processing...") # Add a log message
     # Construct the user message content
     user_content = f"Player Move Frequency Stats:\n{player_stats}\n\n"
     if ai_stats and ai_stats.strip():
     start_time = time.time()
     # Generate response from the model
+    response, generated_tokens = generate_response(
         messages,
         max_length=max_length,
         temperature=temperature,
     # --- Time Measurement End ---
     end_time = time.time()
+    duration = round(end_time - start_time, 2)
     # For display purposes
     display_prompt = f"System Prompt (if used):\n{system_prompt}\n\n------\n\nUser Content:\n{user_content}"
+    print(f"Processing finished in {duration} seconds.") # Add a log message
     # Return all results including time and tokens
     return display_prompt, response, f"{duration} seconds", generated_tokens
+# --- Gradio Interface (No changes needed here) ---
 DEFAULT_SYSTEM_PROMPT = """You are an expert Rock-Paper-Scissors (RPS) strategist focusing on statistical analysis.
 Your task is to recommend the optimal AI move based *only* on the provided move frequency statistics for the player.
 Base your analysis strictly on the provided frequencies and standard RPS rules."""
 DEFAULT_PLAYER_STATS = "Rock: 40%\nPaper: 30%\nScissors: 30%"
+DEFAULT_AI_STATS = ""
 DEFAULT_USER_QUERY = "Based *only* on the player's move frequencies, what single move should the AI make next to maximize its statistical chance of winning? Explain your reasoning clearly step-by-step as instructed."
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 info="Ask the specific question based on the stats."
             )
             system_prompt_input = gr.Textbox(
+                label="System Prompt", value=DEFAULT_SYSTEM_PROMPT,
+                lines=12
             )
         with gr.Column(scale=1): # Params/Output column
             gr.Markdown("## Generation Parameters")
             max_length_slider = gr.Slider(minimum=50, maximum=1024, value=300, step=16, label="Max New Tokens")
+            temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature")
             top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P")
             submit_btn = gr.Button("Generate Response", variant="primary")
             gr.Markdown("## Performance Metrics")
             time_output = gr.Textbox(label="Generation Time", interactive=False)
+            tokens_output = gr.Number(label="Generated Tokens", interactive=False)
             gr.Markdown("""
             ## Testing Tips
             """)
     with gr.Row():
         final_prompt_display = gr.Textbox(
+            label="Formatted Input Sent to Model (via Chat Template)", lines=20
         )
         response_display = gr.Textbox(
+            label="Model Response", lines=20, show_copy_button=True
         )
     submit_btn.click(
         process_input,
         inputs=[
         ],
         outputs=[
             final_prompt_display, response_display,
+            time_output, tokens_output
         ],
+        api_name="generate_rps_frequency_analysis_v2"
     )
 # --- Launch the demo ---
 if __name__ == "__main__":
+    # Share=True is needed for ZeroGPU to work correctly if running locally for testing
+    # but usually not needed when deployed on HF Spaces platform.
+    demo.launch()