Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from threading import Thread
|
|
9 |
|
10 |
print(f"Starting to load the model to memory")
|
11 |
m = AutoModelForCausalLM.from_pretrained(
|
12 |
-
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.
|
13 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
14 |
generator = pipeline('text-generation', model=m, tokenizer=tok)
|
15 |
print(f"Sucessfully loaded the model to the memory")
|
@@ -23,14 +23,11 @@ def user(message, history):
|
|
23 |
|
24 |
def chat(history):
|
25 |
chat = []
|
26 |
-
print(history)
|
27 |
for item in history:
|
28 |
chat.append({"role": "user", "content": item[0]})
|
29 |
if item[1] is not None:
|
30 |
chat.append({"role": "assistant", "content": item[1]})
|
31 |
-
print(chat)
|
32 |
messages = tok.apply_chat_template(chat, tokenize=False)
|
33 |
-
print(messages)
|
34 |
# Tokenize the messages string
|
35 |
model_inputs = tok([messages], return_tensors="pt")
|
36 |
streamer = TextIteratorStreamer(
|
@@ -84,4 +81,4 @@ with gr.Blocks() as demo:
|
|
84 |
clear.click(lambda: None, None, [chatbot], queue=False)
|
85 |
|
86 |
demo.queue(max_size=32, concurrency_count=2)
|
87 |
-
demo.launch()
|
|
|
9 |
|
10 |
print(f"Starting to load the model to memory")
|
11 |
m = AutoModelForCausalLM.from_pretrained(
|
12 |
+
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float32, trust_remote_code=True)
|
13 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
14 |
generator = pipeline('text-generation', model=m, tokenizer=tok)
|
15 |
print(f"Sucessfully loaded the model to the memory")
|
|
|
23 |
|
24 |
def chat(history):
|
25 |
chat = []
|
|
|
26 |
for item in history:
|
27 |
chat.append({"role": "user", "content": item[0]})
|
28 |
if item[1] is not None:
|
29 |
chat.append({"role": "assistant", "content": item[1]})
|
|
|
30 |
messages = tok.apply_chat_template(chat, tokenize=False)
|
|
|
31 |
# Tokenize the messages string
|
32 |
model_inputs = tok([messages], return_tensors="pt")
|
33 |
streamer = TextIteratorStreamer(
|
|
|
81 |
clear.click(lambda: None, None, [chatbot], queue=False)
|
82 |
|
83 |
demo.queue(max_size=32, concurrency_count=2)
|
84 |
+
demo.launch()
|