sagar007 commited on
Commit
08b6fa7
·
verified ·
1 Parent(s): 3880154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -8,9 +8,9 @@ model_id = 'akjindal53244/Llama-3.1-Storm-8B'
8
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
9
  model = LlamaForCausalLM.from_pretrained(
10
  model_id,
11
- torch_dtype=torch.bfloat16,
12
  device_map="auto",
13
- use_flash_attention_2=True
14
  )
15
 
16
  # Function to format the prompt
@@ -22,7 +22,7 @@ def format_prompt(messages):
22
  return prompt
23
 
24
  # Function to generate response
25
- @spaces.GPU(duration=120) # Adjust duration as needed
26
  def generate_response(message, history):
27
  messages = [{"role": "system", "content": "You are a helpful assistant."}]
28
  for human, assistant in history:
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
9
  model = LlamaForCausalLM.from_pretrained(
10
  model_id,
11
+ torch_dtype=torch.float32,
12
  device_map="auto",
13
+ low_cpu_mem_usage=True
14
  )
15
 
16
  # Function to format the prompt
 
22
  return prompt
23
 
24
  # Function to generate response
25
+ @spaces.GPU(duration=300) # Increased duration due to potential slower processing
26
  def generate_response(message, history):
27
  messages = [{"role": "system", "content": "You are a helpful assistant."}]
28
  for human, assistant in history: