Spaces:

sagar007
/

lama_storm_8b

Runtime error

sagar007 commited on Aug 27, 2024

Commit

08b6fa7

verified ·

1 Parent(s): 3880154

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ model_id = 'akjindal53244/Llama-3.1-Storm-8B'
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = LlamaForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
-    use_flash_attention_2=True
 )
 # Function to format the prompt
@@ -22,7 +22,7 @@ def format_prompt(messages):
     return prompt
 # Function to generate response
-@spaces.GPU(duration=120)  # Adjust duration as needed
 def generate_response(message, history):
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
     for human, assistant in history:

 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = LlamaForCausalLM.from_pretrained(
     model_id,
+    torch_dtype=torch.float32,
     device_map="auto",
+    low_cpu_mem_usage=True
 )
 # Function to format the prompt
     return prompt
 # Function to generate response
+@spaces.GPU(duration=300)  # Increased duration due to potential slower processing
 def generate_response(message, history):
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
     for human, assistant in history: