Didier commited on
Commit
6f080e6
·
verified ·
1 Parent(s): a5dc4f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -16,7 +16,7 @@ from transformers import TextIteratorStreamer
16
  from threading import Thread
17
  import torch
18
 
19
- device = 'cuda'
20
  model_id = "google/gemma-3-4b-it"
21
  processor = AutoProcessor.from_pretrained(model_id, use_fast=True, padding_side="left")
22
  model = Gemma3ForConditionalGeneration.from_pretrained(
@@ -24,6 +24,7 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
24
  torch_dtype=torch.bfloat16
25
  ).to(device).eval()
26
 
 
27
  def process(message, history):
28
  """Generate the model response in streaming mode given message and history
29
  """
 
16
  from threading import Thread
17
  import torch
18
 
19
+ device = 'auto'
20
  model_id = "google/gemma-3-4b-it"
21
  processor = AutoProcessor.from_pretrained(model_id, use_fast=True, padding_side="left")
22
  model = Gemma3ForConditionalGeneration.from_pretrained(
 
24
  torch_dtype=torch.bfloat16
25
  ).to(device).eval()
26
 
27
+ @torch.inference_mode()
28
  def process(message, history):
29
  """Generate the model response in streaming mode given message and history
30
  """