|
import os |
|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from huggingface_hub import snapshot_download, hf_hub_download |
|
|
|
|
|
def setup_model(): |
|
instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct" |
|
local_dir = snapshot_download(repo_id=instruct_repo) |
|
gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf" |
|
hf_hub_download( |
|
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", |
|
filename=gguf_filename, |
|
local_dir=local_dir, |
|
local_dir_use_symlinks=False |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
local_dir, |
|
gguf_file=gguf_filename, |
|
trust_remote_code=True |
|
) |
|
return tokenizer, torch.compile(model) |
|
|
|
tokenizer, model = setup_model() |
|
|
|
prompt_prefix = ( |
|
"You are the best energy-saving advisor. " |
|
"Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), " |
|
"including appliance swaps and habit changes. " |
|
"For each, include estimated monthly kWh saved and cost reduction. " |
|
"Keep response under 120 tokens, bullets only." |
|
"\nSummary:\n" |
|
) |
|
|
|
|
|
def generate_recommendation(appliance_info: str) -> str: |
|
prompt = prompt_prefix + appliance_info + "\n\nRecommendations:" |
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=120, |
|
return_dict_in_generate=False, |
|
do_sample=False, |
|
temperature=0.0 |
|
) |
|
text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return text.split("Recommendations:")[-1].strip() |
|
|