rui3000 commited on
Commit
d54daef
·
verified ·
1 Parent(s): 39e684e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -19
app.py CHANGED
@@ -1,19 +1,16 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import json
 
5
 
6
- # Define model name (use a small model)
7
- MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Small enough for HF Spaces free tier
8
 
9
- # Load model and tokenizer (will happen when the Space starts)
10
  print(f"Loading model {MODEL_NAME}...")
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_NAME,
14
- torch_dtype=torch.float16,
15
- device_map="auto",
16
- load_in_8bit=True # Use 8-bit to reduce memory usage
17
  )
18
  print("Model loaded successfully!")
19
 
@@ -42,7 +39,7 @@ Game State:
42
  - Opponent move history: {', '.join(opponent_history)}
43
 
44
  Based on the opponent's pattern of moves, what should the player choose next (Rock, Paper, or Scissors)?
45
- Explain your reasoning, then end with a clear recommendation.
46
  """
47
  return prompt
48
  except Exception as e:
@@ -54,16 +51,17 @@ def generate_advice(game_data):
54
  # Format the prompt
55
  prompt = format_rps_game_prompt(game_data)
56
 
57
- # Generate response from LLM
58
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
59
- with torch.no_grad():
60
- outputs = model.generate(
61
- **inputs,
62
- max_new_tokens=200,
63
- temperature=0.7,
64
- do_sample=True,
65
- top_p=0.9
66
- )
 
67
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
68
 
69
  # Remove the prompt from the response
 
1
  import gradio as gr
 
 
2
  import json
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Define model name - use a very small model
6
+ MODEL_NAME = "EleutherAI/pythia-70m" # Extremely small model, no quantization needed
7
 
 
8
  print(f"Loading model {MODEL_NAME}...")
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
12
+ low_cpu_mem_usage=True, # CPU-friendly settings
13
+ device_map="cpu" # Force CPU usage
 
14
  )
15
  print("Model loaded successfully!")
16
 
 
39
  - Opponent move history: {', '.join(opponent_history)}
40
 
41
  Based on the opponent's pattern of moves, what should the player choose next (Rock, Paper, or Scissors)?
42
+ Explain your reasoning, then provide a clear recommendation.
43
  """
44
  return prompt
45
  except Exception as e:
 
51
  # Format the prompt
52
  prompt = format_rps_game_prompt(game_data)
53
 
54
+ # Generate response from LLM (with CPU-only settings)
55
+ inputs = tokenizer(prompt, return_tensors="pt")
56
+
57
+ # Set max_length to avoid excessive generation
58
+ outputs = model.generate(
59
+ inputs["input_ids"],
60
+ max_new_tokens=100, # Limit token generation
61
+ do_sample=True,
62
+ temperature=0.7,
63
+ top_p=0.9
64
+ )
65
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
 
67
  # Remove the prompt from the response