Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# 0. Install custom transformers and
|
2 |
import os
|
3 |
os.system("pip install git+https://github.com/shumingma/transformers.git")
|
4 |
|
@@ -15,7 +15,7 @@ from transformers import (
|
|
15 |
import gradio as gr
|
16 |
import spaces
|
17 |
|
18 |
-
# 1. System prompt
|
19 |
SYSTEM_PROMPT = """
|
20 |
You are a friendly café assistant for Café Eleven. Your job is to:
|
21 |
1. Greet the customer warmly
|
@@ -27,10 +27,9 @@ You are a friendly café assistant for Café Eleven. Your job is to:
|
|
27 |
Always be polite and helpful!
|
28 |
"""
|
29 |
|
30 |
-
# 2. Model info
|
31 |
MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"
|
32 |
|
33 |
-
#
|
34 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
35 |
model = AutoModelForCausalLM.from_pretrained(
|
36 |
MODEL_ID,
|
@@ -40,7 +39,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
40 |
|
41 |
print(f"Model loaded on device: {model.device}")
|
42 |
|
43 |
-
#
|
44 |
@spaces.GPU
|
45 |
def respond(
|
46 |
message: str,
|
@@ -50,9 +49,6 @@ def respond(
|
|
50 |
temperature: float,
|
51 |
top_p: float,
|
52 |
):
|
53 |
-
"""
|
54 |
-
Generate a chat response using streaming with TextIteratorStreamer.
|
55 |
-
"""
|
56 |
messages = [{"role": "system", "content": system_message}]
|
57 |
for user_msg, bot_msg in history:
|
58 |
if user_msg:
|
@@ -85,7 +81,7 @@ def respond(
|
|
85 |
response += new_text
|
86 |
yield response
|
87 |
|
88 |
-
#
|
89 |
demo = gr.ChatInterface(
|
90 |
fn=respond,
|
91 |
title="Café Eleven Assistant",
|
@@ -135,6 +131,6 @@ demo = gr.ChatInterface(
|
|
135 |
],
|
136 |
)
|
137 |
|
138 |
-
#
|
139 |
if __name__ == "__main__":
|
140 |
-
demo.launch(
|
|
|
1 |
+
# 0. Install custom transformers and import packages
|
2 |
import os
|
3 |
os.system("pip install git+https://github.com/shumingma/transformers.git")
|
4 |
|
|
|
15 |
import gradio as gr
|
16 |
import spaces
|
17 |
|
18 |
+
# 1. System prompt for Café Eleven
|
19 |
SYSTEM_PROMPT = """
|
20 |
You are a friendly café assistant for Café Eleven. Your job is to:
|
21 |
1. Greet the customer warmly
|
|
|
27 |
Always be polite and helpful!
|
28 |
"""
|
29 |
|
|
|
30 |
MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"
|
31 |
|
32 |
+
# 2. Load tokenizer and model
|
33 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
34 |
model = AutoModelForCausalLM.from_pretrained(
|
35 |
MODEL_ID,
|
|
|
39 |
|
40 |
print(f"Model loaded on device: {model.device}")
|
41 |
|
42 |
+
# 3. Response generation with streaming
|
43 |
@spaces.GPU
|
44 |
def respond(
|
45 |
message: str,
|
|
|
49 |
temperature: float,
|
50 |
top_p: float,
|
51 |
):
|
|
|
|
|
|
|
52 |
messages = [{"role": "system", "content": system_message}]
|
53 |
for user_msg, bot_msg in history:
|
54 |
if user_msg:
|
|
|
81 |
response += new_text
|
82 |
yield response
|
83 |
|
84 |
+
# 4. Gradio ChatInterface setup
|
85 |
demo = gr.ChatInterface(
|
86 |
fn=respond,
|
87 |
title="Café Eleven Assistant",
|
|
|
131 |
],
|
132 |
)
|
133 |
|
134 |
+
# 5. Launch normally
|
135 |
if __name__ == "__main__":
|
136 |
+
demo.launch()
|