Spaces:

Tawkat
/

nGPT-v1

Paused

Tawkat commited on Apr 16, 2024

Commit

df7e831

verified ·

1 Parent(s): c88ab80

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 #if torch.cuda.is_available():
-model_id = "meta-llama/Llama-2-7b-chat-hf"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True, token=HF_TOKEN, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 tokenizer.use_default_system_prompt = False
@@ -29,14 +29,19 @@ def generate(
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 #if torch.cuda.is_available():
+model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 tokenizer.use_default_system_prompt = False
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    '''conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+    conversation.append({"role": "user", "content": message})'''
+    prompt = "<s>"
+    for user_prompt, bot_response in chat_history:
+      prompt += f"[INST] {user_prompt} [/INST]"
+      prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
+    input_ids = tokenizer(conversation, return_tensors="pt")['input_ids']
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")