Wh1plashR commited on
Commit
293845a
Β·
verified Β·
1 Parent(s): 9f81ce6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -1,30 +1,34 @@
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from huggingface_hub import hf_hub_download
5
 
6
- # ── Download the GGUF file (only once) ───────────────────────────────────────────
7
- gguf_path = hf_hub_download(
8
- repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
9
- filename="qwen2.5-0.5b-instruct-q5_k_m.gguf"
10
- )
 
11
 
12
- # ── Load tokenizer & model via gguf_file ────────────────────────────────────────
13
- # Transformers will dequantize the GGUF into fp32 for you
 
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(
15
- "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
16
- gguf_file=gguf_path
17
  )
18
 
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
- "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
21
- gguf_file=gguf_path,
22
- device_map="auto", # spread layers across GPU/CPU
23
- torch_dtype=torch.float16 # run in fp16 for speed if GPU supports it
24
  )
25
-
26
- # Compile the model for ~20–30% extra speed (PyTorch 2.0+)
27
- model = torch.compile(model)
28
 
29
  # ── Prompt template ─────────────────────────────────────────────────────────────
30
  prompt_prefix = """
@@ -58,7 +62,7 @@ iface = gr.Interface(
58
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
59
  outputs="text",
60
  title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
61
- description="Provide your appliance usage summary to get targeted, gguf‑powered energy‑saving recommendations."
62
  )
63
 
64
  if __name__ == "__main__":
 
1
+ import os
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from huggingface_hub import snapshot_download
6
 
7
+ # ── 1) pull down the entire repo (config.json, tokenizer files, GGUF, etc.) ─────
8
+ repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
9
+ local_dir = snapshot_download(repo_id=repo_id)
10
+
11
+ # the GGUF filename inside that folder (must match exactly)
12
+ gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
13
 
14
+ # sanity check
15
+ gguf_path = os.path.join(local_dir, gguf_filename)
16
+ assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
17
+
18
+ # ── 2) load tokenizer from the local snapshot ────────────────────────────────────
19
  tokenizer = AutoTokenizer.from_pretrained(
20
+ local_dir,
21
+ trust_remote_code=True, # allow any custom code in the repo
22
  )
23
 
24
+ # ── 3) load the GGUF‑quantized model from that same folder ───────────────────────
25
  model = AutoModelForCausalLM.from_pretrained(
26
+ local_dir,
27
+ gguf_file=gguf_filename, # relative name of the file in local_dir
28
+ device_map="auto",
29
+ trust_remote_code=True,
30
  )
31
+ model = torch.compile(model) # Torch 2.0+ speed‑up
 
 
32
 
33
  # ── Prompt template ─────────────────────────────────────────────────────────────
34
  prompt_prefix = """
 
62
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
63
  outputs="text",
64
  title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
65
+ description="Provide your per‑appliance summary to get targeted, gguf‑powered energy‑saving recommendations."
66
  )
67
 
68
  if __name__ == "__main__":