Spaces:

rui3000
/

RPS_game_assist

Running on Zero

App Files Files Community

rui3000 commited on 11 days ago

Commit

480da6f

verified ·

1 Parent(s): 58e6bf8

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -143

app.py CHANGED Viewed

@@ -3,17 +3,23 @@ import torch
 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load the Qwen2 0.5B model
-model_id = "Qwen/Qwen2-0.5B"
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    trust_remote_code=True
 )
-# Predefined game data in compressed formats
 PREDEFINED_GAMES = {
     "rps_simple": {
         "description": "Rock-Paper-Scissors (Simple Format)",
@@ -22,16 +28,11 @@ PREDEFINED_GAMES = {
             "encoding": {"rock": 0, "paper": 1, "scissors": 2},
             "result_encoding": {"ai_win": 0, "player_win": 1, "tie": 2},
             "rounds": [
-                {"round": 1, "player": 0, "ai": 2, "result": 1},
-                {"round": 2, "player": 1, "ai": 1, "result": 2},
-                {"round": 3, "player": 2, "ai": 0, "result": 0},
-                {"round": 4, "player": 0, "ai": 0, "result": 2},
-                {"round": 5, "player": 1, "ai": 0, "result": 1},
-                {"round": 6, "player": 2, "ai": 2, "result": 2},
-                {"round": 7, "player": 0, "ai": 1, "result": 0},
-                {"round": 8, "player": 1, "ai": 2, "result": 0},
-                {"round": 9, "player": 2, "ai": 1, "result": 1},
-                {"round": 10, "player": 0, "ai": 2, "result": 1}
             ],
             "summary": {"player_wins": 4, "ai_wins": 3, "ties": 3}
         }
@@ -46,146 +47,144 @@ PREDEFINED_GAMES = {
     }
 }
-# Predefined prompt templates
 PROMPT_TEMPLATES = {
-    "basic_analysis": "Who is winning right now? What patterns do you notice in the player's choices?",
-    "prediction": "Based on the player's past choices, predict what the player will choose in the next round. Explain your reasoning.",
-    "strategy": "What strategy should the AI use to improve its win rate? Provide specific recommendations.",
-    "pattern_analysis": "Analyze the frequency of each choice (rock, paper, scissors) made by the player. Is there a dominant pattern?",
-    "structured_analysis": "Provide a structured analysis with these sections: 1) Current winner, 2) Player choice patterns, 3) AI performance, 4) Recommended strategy for AI."
 }
-# Prompt formatters
 def format_rps_simple(game_data):
     """Format the RPS data in a simple way that's easy for small models to understand"""
     game = game_data["data"]
-    # Create a mapping for move names
     move_names = {0: "Rock", 1: "Paper", 2: "Scissors"}
     result_names = {0: "AI wins", 1: "Player wins", 2: "Tie"}
-    # Initialize counters for frequency analysis
     player_moves = {"Rock": 0, "Paper": 0, "Scissors": 0}
-    # Format each round in a simple way
     formatted_data = "Game: Rock-Paper-Scissors\n"
     formatted_data += "Format explanation: [Round#, Player move, AI move, Result]\n"
     formatted_data += "Move codes: 0=Rock, 1=Paper, 2=Scissors\n"
     formatted_data += "Result codes: 0=AI wins, 1=Player wins, 2=Tie\n\n"
     formatted_data += "Game Data:\n"
     for round_data in game["rounds"]:
-        r_num = round_data["round"]
-        p_move = round_data["player"]
-        ai_move = round_data["ai"]
-        result = round_data["result"]
-        # Update player move counter
         player_moves[move_names[p_move]] += 1
-        # Format as [round, player, ai, result]
         formatted_data += f"[{r_num}, {p_move}, {ai_move}, {result}] # R{r_num}: Player {move_names[p_move]}, AI {move_names[ai_move]}, {result_names[result]}\n"
-    # Add summary statistics
     formatted_data += "\nSummary:\n"
     formatted_data += f"Player wins: {game['summary']['player_wins']}\n"
     formatted_data += f"AI wins: {game['summary']['ai_wins']}\n"
     formatted_data += f"Ties: {game['summary']['ties']}\n\n"
-    # Add player move frequencies
     formatted_data += "Player move frequencies:\n"
     for move, count in player_moves.items():
-        formatted_data += f"{move}: {count} times ({count*10}%)\n"
     return formatted_data
 def format_rps_numeric(game_data):
     """Format the RPS data in a highly compressed numeric format"""
     game = game_data["data"]
     formatted_data = "RPS Game Data (compressed format)\n"
     formatted_data += f"Rules: {game['rules']}\n\n"
-    # Format all rounds on a single line
     rounds_str = ",".join([str(r) for r in game['rounds']])
     formatted_data += f"Rounds: {rounds_str}\n\n"
-    # Add score summary
     formatted_data += f"Score: Player={game['score']['P']} AI={game['score']['AI']} Ties={game['score']['Tie']}\n"
     return formatted_data
-# Format selectors
 FORMAT_FUNCTIONS = {
     "rps_simple": format_rps_simple,
     "rps_numeric": format_rps_numeric
 }
-def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.9):
-    """Generate a response from the Qwen2 model based on the input prompt."""
-    # Tokenize the input prompt
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generate response
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_length,
-            do_sample=True,
-            temperature=temperature,
-            top_p=top_p,
         )
-    # Decode the response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the model's response (remove the input prompt)
-    if prompt in response:
-        response = response[len(prompt):]
-    return response.strip()
 def process_input(
     game_format,
     prompt_template,
     custom_prompt,
     use_custom_prompt,
     system_prompt,
-    max_length,
-    temperature,
     top_p
 ):
-    """Process the input and generate a response from the model."""
     # Get the selected game data and format it
     game_data = PREDEFINED_GAMES[game_format]
-    formatted_game_data = FORMAT_FUNCTIONS[game_format](game_data)
-    # Determine which prompt to use
-    prompt_text = custom_prompt if use_custom_prompt else PROMPT_TEMPLATES[prompt_template]
-    # Create the final prompt with optional system prompt
-    if system_prompt:
-        final_prompt = f"{system_prompt}\n\n{formatted_game_data}\n\n{prompt_text}"
-    else:
-        final_prompt = f"{formatted_game_data}\n\n{prompt_text}"
     # Generate response from the model
     response = generate_response(
-        final_prompt,
         max_length=max_length,
         temperature=temperature,
         top_p=top_p
     )
-    return final_prompt, response
-# Create the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Qwen2 0.5B Game Analysis Tester")
-    gr.Markdown("Test how the Qwen2 0.5B model responds to different game data formats and prompts")
     with gr.Row():
         with gr.Column():
             # Game data selection
@@ -194,81 +193,76 @@ with gr.Blocks() as demo:
                 value="rps_simple",
                 label="Game Data Format"
             )
             # System prompt (optional)
             system_prompt = gr.Textbox(
                 label="System Prompt (Optional)",
-                placeholder="e.g., You are an expert game analyzer. Your task is to analyze game patterns and provide insights.",
-                lines=2
             )
             # Prompt selection
             with gr.Row():
                 prompt_template = gr.Dropdown(
                     choices=list(PROMPT_TEMPLATES.keys()),
-                    value="basic_analysis",
                     label="Prompt Template"
                 )
                 use_custom_prompt = gr.Checkbox(
                     label="Use Custom Prompt",
                     value=False
                 )
             custom_prompt = gr.Textbox(
-                label="Custom Prompt (if enabled above)",
-                placeholder="Enter your custom prompt here",
-                lines=2
             )
             # Generation parameters
             with gr.Row():
                 max_length = gr.Slider(
-                    minimum=50,
-                    maximum=512,
-                    value=256,
-                    step=1,
-                    label="Max Response Length"
                 )
                 temperature = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.5,
-                    value=0.7,
-                    step=0.1,
-                    label="Temperature"
                 )
                 top_p = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.9,
-                    step=0.1,
-                    label="Top P"
                 )
             # Generate button
-            submit_btn = gr.Button("Generate Response")
         with gr.Column():
             # Display final prompt and model response
             final_prompt_display = gr.Textbox(
-                label="Final Prompt Sent to Model",
-                lines=12
             )
             response_display = gr.Textbox(
-                label="Model Response",
-                lines=12
             )
             # Tips for using the interface
             gr.Markdown("""
             ## Testing Tips
-            - The **Game Data Format** determines how game information is presented to the model
-            - The **System Prompt** can be used to provide context or role instructions
-            - **Prompt Templates** offer pre-made queries, or you can use a custom prompt
-            - Experiment with **Temperature** (higher = more creative/random, lower = more focused)
-            - Document successful prompts for fine-tuning datasets
             """)
     # Handle button click
     submit_btn.click(
         process_input,
@@ -282,8 +276,10 @@ with gr.Blocks() as demo:
             temperature,
             top_p
         ],
-        outputs=[final_prompt_display, response_display]
     )
-# Launch the demo
-demo.launch()

 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- Configuration ---
+# Updated to the 1.5B Instruct model as requested
+MODEL_ID = "Qwen/Qwen2-1.5B-Instruct"
+# --- Load Model and Tokenizer ---
+print(f"Loading model: {MODEL_ID}")
+# Removed trust_remote_code=True as it's generally not needed for standard HF models
+# Using torch_dtype="auto" for flexibility (can use bfloat16 if available)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype="auto", # Changed from float16 to auto
+    device_map="auto"
 )
+print("Model loaded successfully.")
+# --- Predefined Data (Keep user's structure) ---
 PREDEFINED_GAMES = {
     "rps_simple": {
         "description": "Rock-Paper-Scissors (Simple Format)",
             "encoding": {"rock": 0, "paper": 1, "scissors": 2},
             "result_encoding": {"ai_win": 0, "player_win": 1, "tie": 2},
             "rounds": [
+                {"round": 1, "player": 0, "ai": 2, "result": 1}, {"round": 2, "player": 1, "ai": 1, "result": 2},
+                {"round": 3, "player": 2, "ai": 0, "result": 0}, {"round": 4, "player": 0, "ai": 0, "result": 2},
+                {"round": 5, "player": 1, "ai": 0, "result": 1}, {"round": 6, "player": 2, "ai": 2, "result": 2},
+                {"round": 7, "player": 0, "ai": 1, "result": 0}, {"round": 8, "player": 1, "ai": 2, "result": 0},
+                {"round": 9, "player": 2, "ai": 1, "result": 1}, {"round": 10, "player": 0, "ai": 2, "result": 1}
             ],
             "summary": {"player_wins": 4, "ai_wins": 3, "ties": 3}
         }
     }
 }
+# --- Predefined Prompts (Keep user's structure) ---
+# Updated default prompts to be more aligned with the goal
 PROMPT_TEMPLATES = {
+    "detailed_analysis_recommendation": "Analyze the game history provided. Identify patterns in the player's moves. Based on your analysis, explain the reasoning and recommend the best move for the AI (or player if specified) in the next round.",
+    "player_pattern_focus": "Focus specifically on the player's move patterns. Do they favor a specific move? Do they follow sequences? Do they react predictably after winning or losing?",
+    "brief_recommendation": "Based on the history, what single move (Rock, Paper, or Scissors) should be played next and give a one-sentence justification?",
+    "structured_output_request": "Provide a structured analysis with these sections: 1) Obvious player patterns, 2) Potential opponent counter-strategies, 3) Final move recommendation with reasoning."
 }
+# --- Formatting Functions (Keep user's functions) ---
 def format_rps_simple(game_data):
     """Format the RPS data in a simple way that's easy for small models to understand"""
     game = game_data["data"]
     move_names = {0: "Rock", 1: "Paper", 2: "Scissors"}
     result_names = {0: "AI wins", 1: "Player wins", 2: "Tie"}
     player_moves = {"Rock": 0, "Paper": 0, "Scissors": 0}
     formatted_data = "Game: Rock-Paper-Scissors\n"
     formatted_data += "Format explanation: [Round#, Player move, AI move, Result]\n"
     formatted_data += "Move codes: 0=Rock, 1=Paper, 2=Scissors\n"
     formatted_data += "Result codes: 0=AI wins, 1=Player wins, 2=Tie\n\n"
     formatted_data += "Game Data:\n"
     for round_data in game["rounds"]:
+        r_num, p_move, ai_move, result = round_data["round"], round_data["player"], round_data["ai"], round_data["result"]
         player_moves[move_names[p_move]] += 1
         formatted_data += f"[{r_num}, {p_move}, {ai_move}, {result}] # R{r_num}: Player {move_names[p_move]}, AI {move_names[ai_move]}, {result_names[result]}\n"
     formatted_data += "\nSummary:\n"
     formatted_data += f"Player wins: {game['summary']['player_wins']}\n"
     formatted_data += f"AI wins: {game['summary']['ai_wins']}\n"
     formatted_data += f"Ties: {game['summary']['ties']}\n\n"
     formatted_data += "Player move frequencies:\n"
+    total_rounds = len(game["rounds"])
     for move, count in player_moves.items():
+         percentage = round((count / total_rounds) * 100) if total_rounds > 0 else 0
+         formatted_data += f"{move}: {count} times ({percentage}%)\n" # Corrected percentage calc
     return formatted_data
 def format_rps_numeric(game_data):
     """Format the RPS data in a highly compressed numeric format"""
     game = game_data["data"]
     formatted_data = "RPS Game Data (compressed format)\n"
     formatted_data += f"Rules: {game['rules']}\n\n"
     rounds_str = ",".join([str(r) for r in game['rounds']])
     formatted_data += f"Rounds: {rounds_str}\n\n"
     formatted_data += f"Score: Player={game['score']['P']} AI={game['score']['AI']} Ties={game['score']['Tie']}\n"
     return formatted_data
 FORMAT_FUNCTIONS = {
     "rps_simple": format_rps_simple,
     "rps_numeric": format_rps_numeric
 }
+# --- Generation Function (Updated for Chat Template) ---
+def generate_response(messages, max_length=512, temperature=0.7, top_p=0.9):
+    """Generate a response from the Qwen2 model using chat template."""
+    try:
+        # Apply the chat template
+        prompt_text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True # Important for instruct models
         )
+        # Tokenize the formatted prompt
+        model_inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)
+        # Generation arguments
+        generation_kwargs = {
+            "max_new_tokens": max_length,
+            "temperature": temperature,
+            "top_p": top_p,
+            "do_sample": True,
+            "pad_token_id": tokenizer.eos_token_id,
+        }
+        # Generate response
+        print("Generating response...")
+        with torch.no_grad():
+            generated_ids = model.generate(model_inputs.input_ids, **generation_kwargs)
+        # Decode the response, excluding the input tokens
+        input_ids_len = model_inputs.input_ids.shape[-1]
+        output_ids = generated_ids[0, input_ids_len:]
+        response = tokenizer.decode(output_ids, skip_special_tokens=True)
+        print("Generation complete.")
+        return response.strip()
+    except Exception as e:
+        print(f"Error during generation: {e}")
+        return f"An error occurred: {str(e)}"
+# --- Input Processing Function (Updated for Chat Template) ---
 def process_input(
     game_format,
     prompt_template,
     custom_prompt,
     use_custom_prompt,
     system_prompt,
+    max_length,
+    temperature,
     top_p
 ):
+    """Process the input, format using chat template, and generate response."""
     # Get the selected game data and format it
     game_data = PREDEFINED_GAMES[game_format]
+    formatted_game_data = FORMAT_FUNCTIONS[game_format](game_data) #
+    # Determine which prompt question to use
+    user_question = custom_prompt if use_custom_prompt else PROMPT_TEMPLATES[prompt_template] #
+    # Construct the user message content
+    user_content = f"Game History:\n{formatted_game_data}\n\nQuestion:\n{user_question}"
+    # Create the messages list for the chat template
+    messages = []
+    if system_prompt and system_prompt.strip(): # Add system prompt if provided
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": user_content})
     # Generate response from the model
     response = generate_response(
+        messages,
         max_length=max_length,
         temperature=temperature,
         top_p=top_p
     )
+    # For display purposes, show the "user" part of the prompt
+    # (The system prompt isn't usually shown in the final input display)
+    display_prompt = f"System Prompt (if used):\n{system_prompt}\n\n------\n\nUser Content:\n{user_content}"
+    return display_prompt, response
+# --- Gradio Interface (Minor updates) ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"# {MODEL_ID} - RPS Game Analysis Tester") # Updated Title
+    gr.Markdown("Test how the model responds to different RPS game data formats and prompts using its chat template.")
     with gr.Row():
         with gr.Column():
             # Game data selection
                 value="rps_simple",
                 label="Game Data Format"
             )
             # System prompt (optional)
+            # Added a more relevant placeholder based on the user's goal
             system_prompt = gr.Textbox(
                 label="System Prompt (Optional)",
+                placeholder="e.g., You are an expert RPS analyst. Analyze the provided game history, identify patterns, explain your reasoning clearly, and recommend the next move. Structure your output with observations, reasoning, and a final recommendation.",
+                lines=4 # Increased lines slightly
             )
             # Prompt selection
             with gr.Row():
                 prompt_template = gr.Dropdown(
                     choices=list(PROMPT_TEMPLATES.keys()),
+                    value="detailed_analysis_recommendation", # Updated default
                     label="Prompt Template"
                 )
                 use_custom_prompt = gr.Checkbox(
                     label="Use Custom Prompt",
                     value=False
                 )
             custom_prompt = gr.Textbox(
+                label="Custom Prompt (if Use Custom Prompt is checked)",
+                placeholder="Enter your custom prompt/question here",
+                lines=3 # Increased lines slightly
             )
             # Generation parameters
             with gr.Row():
                 max_length = gr.Slider(
+                    minimum=50,
+                    maximum=1024, # Increased max
+                    value=512,    # Increased default
+                    step=16,      # Step size power of 2
+                    label="Max New Tokens" # Renamed label
                 )
                 temperature = gr.Slider(
+                    minimum=0.1, maximum=1.5, value=0.7, step=0.05, label="Temperature" # Step size finer
                 )
                 top_p = gr.Slider(
+                    minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P" # Step size finer
                 )
             # Generate button
+            submit_btn = gr.Button("Generate Response", variant="primary") # Added variant
         with gr.Column():
             # Display final prompt and model response
+            # Renamed label for clarity
             final_prompt_display = gr.Textbox(
+                label="Formatted Input Sent to Model (via Chat Template)",
+                lines=15 # Increased lines
             )
             response_display = gr.Textbox(
+                label="Model Response",
+                lines=15, # Increased lines
+                show_copy_button=True # Added copy button
             )
             # Tips for using the interface
             gr.Markdown("""
             ## Testing Tips
+            - **Game Data Format**: Selects how the history is structured. 'rps_simple' is often easier for models to parse.
+            - **System Prompt**: Crucial for setting the AI's role and desired output style (like your example image). Be descriptive!
+            - **Prompt Template / Custom Prompt**: Asks the specific question based on the history and system instructions.
+            - **Generation Params**: Tune `Temperature` and `Top P` to control creativity vs. focus. Adjust `Max New Tokens` for response length.
+            - **Chat Template**: This version now correctly uses the model's chat template for better instruction following.
             """)
     # Handle button click
     submit_btn.click(
         process_input,
             temperature,
             top_p
         ],
+        outputs=[final_prompt_display, response_display],
+        api_name="generate_rps_analysis" # Added api_name
     )
+# --- Launch the demo ---
+if __name__ == "__main__":
+    demo.launch()