Spaces:

Wh1plashR
/

AppTry

Sleeping

App Files Files Community

Wh1plashR commited on 17 days ago

Commit

293845a

verified ·

1 Parent(s): 9f81ce6

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -1,30 +1,34 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from huggingface_hub import hf_hub_download
-# ── Download the GGUF file (only once) ───────────────────────────────────────────
-gguf_path = hf_hub_download(
-    repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
-    filename="qwen2.5-0.5b-instruct-q5_k_m.gguf"
-)
-# ── Load tokenizer & model via gguf_file ────────────────────────────────────────
-# Transformers will dequantize the GGUF into fp32 for you
 tokenizer = AutoTokenizer.from_pretrained(
-    "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
-    gguf_file=gguf_path
 )
 model = AutoModelForCausalLM.from_pretrained(
-    "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
-    gguf_file=gguf_path,
-    device_map="auto",       # spread layers across GPU/CPU
-    torch_dtype=torch.float16 # run in fp16 for speed if GPU supports it
 )
-# Compile the model for ~20–30% extra speed (PyTorch 2.0+)
-model = torch.compile(model)
 # ── Prompt template ─────────────────────────────────────────────────────────────
 prompt_prefix = """
@@ -58,7 +62,7 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150 W, 8 h/day, 7 days/week\n..."),
     outputs="text",
     title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
-    description="Provide your appliance usage summary to get targeted, gguf‑powered energy‑saving recommendations."
 )
 if __name__ == "__main__":

+import os
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import snapshot_download
+# ── 1) pull down the entire repo (config.json, tokenizer files, GGUF, etc.) ─────
+repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
+local_dir = snapshot_download(repo_id=repo_id)
+# the GGUF filename inside that folder (must match exactly)
+gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
+# sanity check
+gguf_path = os.path.join(local_dir, gguf_filename)
+assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
+# ── 2) load tokenizer from the local snapshot ────────────────────────────────────
 tokenizer = AutoTokenizer.from_pretrained(
+    local_dir,
+    trust_remote_code=True,    # allow any custom code in the repo
 )
+# ── 3) load the GGUF‑quantized model from that same folder ───────────────────────
 model = AutoModelForCausalLM.from_pretrained(
+    local_dir,
+    gguf_file=gguf_filename,   # relative name of the file in local_dir
+    device_map="auto",
+    trust_remote_code=True,
 )
+model = torch.compile(model)  # Torch 2.0+ speed‑up
 # ── Prompt template ─────────────────────────────────────────────────────────────
 prompt_prefix = """
     inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150 W, 8 h/day, 7 days/week\n..."),
     outputs="text",
     title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
+    description="Provide your per‑appliance summary to get targeted, gguf‑powered energy‑saving recommendations."
 )
 if __name__ == "__main__":