Wh1plashR commited on
Commit
84f6b1a
Β·
verified Β·
1 Parent(s): 155f444

using qwen2.5

Browse files
Files changed (1) hide show
  1. app.py +24 -15
app.py CHANGED
@@ -1,22 +1,32 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
 
5
- # ── Model & tokenizer ───────────────────────────────────────────────────────────
6
- model_name = "microsoft/Phi-3-mini-4k-instruct" # 3.8β€―B‑param open instruct model :contentReference[oaicite:0]{index=0}
 
 
 
7
 
8
- # seed for reproducibility
9
- torch.random.manual_seed(0)
 
 
 
 
10
 
11
- # load tokenizer (no gating, open access) :contentReference[oaicite:1]{index=1}
12
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
13
 
14
- # load as causal‑LM, quantize to int8 to halve memory & speed up inference :contentReference[oaicite:2]{index=2}
15
- model = AutoModelForCausalLM.from_pretrained(model_name)
16
- # compile once for ~30% further speed‑up :contentReference[oaicite:3]{index=3}
17
  model = torch.compile(model)
18
 
19
- # ── Prompt template ──────────────────────────────────────────────────────────────
20
  prompt_prefix = """
21
  You are an energy‑saving expert tasked to help households reduce their monthly electricity bills.
22
  Given the user's appliance usage information (device name, wattage, hours used per day, days used per week):
@@ -27,20 +37,19 @@ Format with bullet points.
27
  Here is the summary:
28
  """
29
 
30
- # ── Generation function ──────────────────────────────────────────────────────────
31
  def generate_recommendation(appliance_info: str) -> str:
32
  prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
33
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
  with torch.no_grad():
35
  outputs = model.generate(
36
  **inputs,
37
- max_new_tokens=150,
38
  use_cache=True,
39
  do_sample=False,
40
  temperature=0.0
41
  )
42
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
- # return only the recommendations section
44
  return text.split("Recommendations:")[-1].strip()
45
 
46
  # ── Gradio interface ────────────────────────────────────────────────────────────
@@ -48,8 +57,8 @@ iface = gr.Interface(
48
  fn=generate_recommendation,
49
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
50
  outputs="text",
51
- title="Energy‑Saving Tips (Phi‑3‑Mini‑4K‑Instruct)",
52
- description="Paste your per‑appliance summary to get targeted energy‑saving recommendations."
53
  )
54
 
55
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from huggingface_hub import hf_hub_download
5
 
6
+ # ── Download the GGUF file (only once) ───────────────────────────────────────────
7
+ gguf_path = hf_hub_download(
8
+ repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
9
+ filename="qwen2.5-0.5b-instruct-q5_k_m.gguf"
10
+ )
11
 
12
+ # ── Load tokenizer & model via gguf_file ────────────────────────────────────────
13
+ # Transformers will dequantize the GGUF into fp32 for you
14
+ tokenizer = AutoTokenizer.from_pretrained(
15
+ "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
16
+ gguf_file=gguf_path
17
+ )
18
 
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
21
+ gguf_file=gguf_path,
22
+ device_map="auto", # spread layers across GPU/CPU
23
+ torch_dtype=torch.float16 # run in fp16 for speed if GPU supports it
24
+ )
25
 
26
+ # Compile the model for ~20–30% extra speed (PyTorch 2.0+)
 
 
27
  model = torch.compile(model)
28
 
29
+ # ── Prompt template ─────────────────────────────────────────────────────────────
30
  prompt_prefix = """
31
  You are an energy‑saving expert tasked to help households reduce their monthly electricity bills.
32
  Given the user's appliance usage information (device name, wattage, hours used per day, days used per week):
 
37
  Here is the summary:
38
  """
39
 
40
+ # ── Generation function ─────────────────────────────────────────────────────────
41
  def generate_recommendation(appliance_info: str) -> str:
42
  prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
43
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
44
  with torch.no_grad():
45
  outputs = model.generate(
46
  **inputs,
47
+ max_new_tokens=120,
48
  use_cache=True,
49
  do_sample=False,
50
  temperature=0.0
51
  )
52
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
53
  return text.split("Recommendations:")[-1].strip()
54
 
55
  # ── Gradio interface ────────────────────────────────────────────────────────────
 
57
  fn=generate_recommendation,
58
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
59
  outputs="text",
60
+ title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
61
+ description="Provide your appliance usage summary to get targeted, gguf‑powered energy‑saving recommendations."
62
  )
63
 
64
  if __name__ == "__main__":