AppTry / app.py
Wh1plashR's picture
clean code
12662f5 verified
raw
history blame
1.84 kB
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import snapshot_download, hf_hub_download
# Download model files
def setup_model():
instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
local_dir = snapshot_download(repo_id=instruct_repo)
gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
hf_hub_download(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename=gguf_filename,
local_dir=local_dir,
local_dir_use_symlinks=False
)
tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
local_dir,
gguf_file=gguf_filename,
trust_remote_code=True
)
return tokenizer, torch.compile(model)
tokenizer, model = setup_model()
prompt_prefix = (
"You are the best energy-saving advisor. "
"Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), "
"including appliance swaps and habit changes. "
"For each, include estimated monthly kWh saved and cost reduction. "
"Keep response under 120 tokens, bullets only."
"\nSummary:\n"
)
# Generation function
def generate_recommendation(appliance_info: str) -> str:
prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=120,
return_dict_in_generate=False,
do_sample=False,
temperature=0.0
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text.split("Recommendations:")[-1].strip()