Spaces:

Wh1plashR
/

AppTry

Sleeping

Wh1plashR commited on 17 days ago

Commit

be2c3d1

verified ·

1 Parent(s): cf42c9a

Optimised code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,16 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Load the pre-trained model and tokenizer
-model_name = "microsoft/phi-2"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
 promptPre = f"""You are an energy-saving expert tasked to help households reduce their monthly electricity bills.
 Given the user's appliance usage information (including device name, wattage, hours used per day, and days used per week),
@@ -24,12 +29,11 @@ Be empathetic, practical, and encouraging. Focus on achievable actions for the u
 Here is the user's input:
 """
-def generate_recommendation(appliance_info):
-    prompt = f"Input: promptPre + {appliance_info}\nOutput:"
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_new_tokens=200)
-    recommendation = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return recommendation.split("Output:")[-1].strip()
 # Define the Gradio interface
 iface = gr.Interface(

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # Load the pre-trained model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+model = AutoModelForCausalLM.from_pretrained(
+  "microsoft/phi-2",
+  load_in_8bit=True,
+  device_map="auto"
+)
+model = torch.compile(model)
 promptPre = f"""You are an energy-saving expert tasked to help households reduce their monthly electricity bills.
 Given the user's appliance usage information (including device name, wattage, hours used per day, and days used per week),
 Here is the user's input:
 """
+def generate_recommendation(summary):
+    inputs = tokenizer(summary, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        out = model.generate(**inputs, max_new_tokens=100, use_cache=True)
+    return tokenizer.decode(out[0], skip_special_tokens=True)
 # Define the Gradio interface
 iface = gr.Interface(