Spaces:

Wh1plashR
/

AppTry

Sleeping

App Files Files Community

AppTry / app.py

Wh1plashR

clean code

12662f5 verified 26 days ago

raw

history blame

1.84 kB

	import os
	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import snapshot_download, hf_hub_download

	# Download model files
	def setup_model():
	instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
	local_dir = snapshot_download(repo_id=instruct_repo)
	gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
	hf_hub_download(
	repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
	filename=gguf_filename,
	local_dir=local_dir,
	local_dir_use_symlinks=False
	)
	tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	local_dir,
	gguf_file=gguf_filename,
	trust_remote_code=True
	)
	return tokenizer, torch.compile(model)

	tokenizer, model = setup_model()

	prompt_prefix = (
	"You are the best energy-saving advisor. "
	"Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), "
	"including appliance swaps and habit changes. "
	"For each, include estimated monthly kWh saved and cost reduction. "
	"Keep response under 120 tokens, bullets only."
	"\nSummary:\n"
	)

	# Generation function
	def generate_recommendation(appliance_info: str) -> str:
	prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=120,
	return_dict_in_generate=False,
	do_sample=False,
	temperature=0.0
	)
	text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return text.split("Recommendations:")[-1].strip()