bobber commited on
Commit
c2779df
·
verified ·
1 Parent(s): 5c1ba29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -41,8 +41,7 @@ model = Llama4ForConditionalGeneration.from_pretrained(
41
  # quantization_config=bnb_config,
42
  device_map="auto",
43
  )
44
- # processor = AutoProcessor.from_pretrained(model_name, cache_dir = cache_dir)
45
- tokenizer = AutoProcessor.from_pretrained(model_name
46
  # , gguf_file=filename
47
  # , subfolder=subfolder
48
  )
@@ -81,7 +80,7 @@ def generate(prompt, history):
81
 
82
  # response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
83
  # return response
84
- inputs = tokenizer.apply_chat_template(
85
  messages,
86
  add_generation_prompt=True,
87
  tokenize=True,
@@ -92,7 +91,7 @@ def generate(prompt, history):
92
  **inputs,
93
  max_new_tokens=512,
94
  )
95
- response = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
96
 
97
 
98
  chat_interface = gr.ChatInterface(
 
41
  # quantization_config=bnb_config,
42
  device_map="auto",
43
  )
44
+ processor = AutoProcessor.from_pretrained(model_name, cache_dir = cache_dir)
 
45
  # , gguf_file=filename
46
  # , subfolder=subfolder
47
  )
 
80
 
81
  # response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
82
  # return response
83
+ inputs = processor.apply_chat_template(
84
  messages,
85
  add_generation_prompt=True,
86
  tokenize=True,
 
91
  **inputs,
92
  max_new_tokens=512,
93
  )
94
+ response = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
95
 
96
 
97
  chat_interface = gr.ChatInterface(