Spaces:

rui3000
/

RPS_game_assist

Sleeping

App Files Files Community

rui3000 commited on Apr 17

Commit

d54daef

verified ·

1 Parent(s): 39e684e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -19

app.py CHANGED Viewed

@@ -1,19 +1,16 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import json
-# Define model name (use a small model)
-MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Small enough for HF Spaces free tier
-# Load model and tokenizer (will happen when the Space starts)
 print(f"Loading model {MODEL_NAME}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    load_in_8bit=True  # Use 8-bit to reduce memory usage
 )
 print("Model loaded successfully!")
@@ -42,7 +39,7 @@ Game State:
 - Opponent move history: {', '.join(opponent_history)}
 Based on the opponent's pattern of moves, what should the player choose next (Rock, Paper, or Scissors)?
-Explain your reasoning, then end with a clear recommendation.
 """
         return prompt
     except Exception as e:
@@ -54,16 +51,17 @@ def generate_advice(game_data):
         # Format the prompt
         prompt = format_rps_game_prompt(game_data)
-        # Generate response from LLM
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=200,
-                temperature=0.7,
-                do_sample=True,
-                top_p=0.9
-            )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Remove the prompt from the response

 import gradio as gr
 import json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Define model name - use a very small model
+MODEL_NAME = "EleutherAI/pythia-70m"  # Extremely small model, no quantization needed
 print(f"Loading model {MODEL_NAME}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    low_cpu_mem_usage=True,  # CPU-friendly settings
+    device_map="cpu"  # Force CPU usage
 )
 print("Model loaded successfully!")
 - Opponent move history: {', '.join(opponent_history)}
 Based on the opponent's pattern of moves, what should the player choose next (Rock, Paper, or Scissors)?
+Explain your reasoning, then provide a clear recommendation.
 """
         return prompt
     except Exception as e:
         # Format the prompt
         prompt = format_rps_game_prompt(game_data)
+        # Generate response from LLM (with CPU-only settings)
+        inputs = tokenizer(prompt, return_tensors="pt")
+        # Set max_length to avoid excessive generation
+        outputs = model.generate(
+            inputs["input_ids"],
+            max_new_tokens=100,  # Limit token generation
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9
+        )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Remove the prompt from the response