Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
15 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
16 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto")
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
18 |
-
tokenizer.use_default_system_prompt = False
|
19 |
|
20 |
|
21 |
#@spaces.GPU
|
@@ -41,7 +41,7 @@ def generate(
|
|
41 |
prompt += f" {bot_response}</s> "
|
42 |
prompt += f"[INST] {message} [/INST]"
|
43 |
|
44 |
-
input_ids = tokenizer(
|
45 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
46 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
47 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
|
|
15 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
16 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto")
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
18 |
+
#tokenizer.use_default_system_prompt = False
|
19 |
|
20 |
|
21 |
#@spaces.GPU
|
|
|
41 |
prompt += f" {bot_response}</s> "
|
42 |
prompt += f"[INST] {message} [/INST]"
|
43 |
|
44 |
+
input_ids = tokenizer(prompt, return_tensors="pt")['input_ids']
|
45 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
46 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
47 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|