Update app.py
Browse files
app.py
CHANGED
@@ -1,30 +1,34 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
from huggingface_hub import
|
5 |
|
6 |
-
# ββ
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
)
|
|
|
11 |
|
12 |
-
#
|
13 |
-
|
|
|
|
|
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained(
|
15 |
-
|
16 |
-
|
17 |
)
|
18 |
|
|
|
19 |
model = AutoModelForCausalLM.from_pretrained(
|
20 |
-
|
21 |
-
gguf_file=
|
22 |
-
device_map="auto",
|
23 |
-
|
24 |
)
|
25 |
-
|
26 |
-
# Compile the model for ~20β30% extra speed (PyTorch 2.0+)
|
27 |
-
model = torch.compile(model)
|
28 |
|
29 |
# ββ Prompt template βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
30 |
prompt_prefix = """
|
@@ -58,7 +62,7 @@ iface = gr.Interface(
|
|
58 |
inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β―W, 8β―h/day, 7β―days/week\n..."),
|
59 |
outputs="text",
|
60 |
title="EnergyβSaving Tips (Qwen2.5β0.5BβInstructβGGUF)",
|
61 |
-
description="Provide your appliance
|
62 |
)
|
63 |
|
64 |
if __name__ == "__main__":
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
+
from huggingface_hub import snapshot_download
|
6 |
|
7 |
+
# ββ 1) pull down the entire repo (config.json, tokenizer files, GGUF, etc.) βββββ
|
8 |
+
repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
|
9 |
+
local_dir = snapshot_download(repo_id=repo_id)
|
10 |
+
|
11 |
+
# the GGUF filename inside that folder (must match exactly)
|
12 |
+
gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
|
13 |
|
14 |
+
# sanity check
|
15 |
+
gguf_path = os.path.join(local_dir, gguf_filename)
|
16 |
+
assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
|
17 |
+
|
18 |
+
# ββ 2) load tokenizer from the local snapshot ββββββββββββββββββββββββββββββββββββ
|
19 |
tokenizer = AutoTokenizer.from_pretrained(
|
20 |
+
local_dir,
|
21 |
+
trust_remote_code=True, # allow any custom code in the repo
|
22 |
)
|
23 |
|
24 |
+
# ββ 3) load the GGUFβquantized model from that same folder βββββββββββββββββββββββ
|
25 |
model = AutoModelForCausalLM.from_pretrained(
|
26 |
+
local_dir,
|
27 |
+
gguf_file=gguf_filename, # relative name of the file in local_dir
|
28 |
+
device_map="auto",
|
29 |
+
trust_remote_code=True,
|
30 |
)
|
31 |
+
model = torch.compile(model) # Torch 2.0+ speedβup
|
|
|
|
|
32 |
|
33 |
# ββ Prompt template βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
34 |
prompt_prefix = """
|
|
|
62 |
inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β―W, 8β―h/day, 7β―days/week\n..."),
|
63 |
outputs="text",
|
64 |
title="EnergyβSaving Tips (Qwen2.5β0.5BβInstructβGGUF)",
|
65 |
+
description="Provide your perβappliance summary to get targeted, ggufβpowered energyβsaving recommendations."
|
66 |
)
|
67 |
|
68 |
if __name__ == "__main__":
|