merterbak commited on
Commit
fa7e3c5
·
verified ·
1 Parent(s): 5b57421

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ import torch
5
+ from threading import Thread
6
+
7
+ phi4_model_path = "microsoft/phi-4"
8
+ phi4_mini_model_path = "microsoft/Phi-4-mini-instruct"
9
+
10
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
+
12
+ phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
13
+ phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
14
+ phi4_mini_model = AutoModelForCausalLM.from_pretrained(phi4_mini_model_path, torch_dtype="auto").to(device)
15
+ phi4_mini_tokenizer = AutoTokenizer.from_pretrained(phi4_mini_model_path)
16
+
17
+ @spaces.GPU(duration=60)
18
+ def generate_response(user_message, model_name, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
19
+ if not user_message.strip():
20
+ return history_state, history_state
21
+
22
+ # Select models
23
+ if model_name == "Phi-4":
24
+ model = phi4_model
25
+ tokenizer = phi4_tokenizer
26
+ start_tag = "<|im_start|>"
27
+ sep_tag = "<|im_sep|>"
28
+ end_tag = "<|im_end|>"
29
+ elif model_name == "Phi-4-mini-instruct":
30
+ model = phi4_mini_model
31
+ tokenizer = phi4_mini_tokenizer
32
+ start_tag = ""
33
+ sep_tag = ""
34
+ end_tag = "<|end|>"
35
+ else:
36
+ raise ValueError("Invalid model selected")
37
+
38
+ # Recommended prompt settings by Microsoft
39
+ system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
40
+ if model_name == "Phi-4":
41
+ prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
42
+ for message in history_state:
43
+ if message["role"] == "user":
44
+ prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
45
+ elif message["role"] == "assistant" and message["content"]:
46
+ prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
47
+ prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
48
+ else:
49
+ prompt = f"<|system|>{system_message}{end_tag}"
50
+ for message in history_state:
51
+ if message["role"] == "user":
52
+ prompt += f"<|user|>{message['content']}{end_tag}"
53
+ elif message["role"] == "assistant" and message["content"]:
54
+ prompt += f"<|assistant|>{message['content']}{end_tag}"
55
+ prompt += f"<|user|>{user_message}{end_tag}<|assistant|>"
56
+
57
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
58
+
59
+ do_sample = not (temperature == 1.0 and top_k >= 100 and top_p == 1.0)
60
+
61
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
62
+
63
+ # sampling techniques
64
+ generation_kwargs = {
65
+ "input_ids": inputs["input_ids"],
66
+ "attention_mask": inputs["attention_mask"],
67
+ "max_new_tokens": int(max_tokens),
68
+ "do_sample": do_sample,
69
+ "temperature": temperature,
70
+ "top_k": int(top_k),
71
+ "top_p": top_p,
72
+ "repetition_penalty": repetition_penalty,
73
+ "streamer": streamer,
74
+ }
75
+
76
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
77
+ thread.start()
78
+
79
+ # Stream the response
80
+ assistant_response = ""
81
+ new_history = history_state + [
82
+ {"role": "user", "content": user_message},
83
+ {"role": "assistant", "content": ""}
84
+ ]
85
+ for new_token in streamer:
86
+ cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "").replace("<|end|>", "").replace("<|system|>", "").replace("<|user|>", "").replace("<|assistant|>", "")
87
+ assistant_response += cleaned_token
88
+ new_history[-1]["content"] = assistant_response.strip()
89
+ yield new_history, new_history
90
+
91
+ yield new_history, new_history
92
+
93
+ example_messages = {
94
+ "Learn about physics": "Explain Newton’s laws of motion.",
95
+ "Discover space facts": "What are some interesting facts about black holes?",
96
+ "Write a factorial function": "Write a Python function to calculate the factorial of a number."
97
+ }
98
+
99
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
+ gr.Markdown(
101
+ """
102
+ # Phi-4 Chatbot Demo
103
+ Welcome to the Phi-4 Chatbot Demo! You can chat with Microsoft's Phi-4 or Phi-4-mini-instruct models. Adjust the settings on the left to customize the model's responses.
104
+ """
105
+ )
106
+
107
+ history_state = gr.State([])
108
+
109
+ with gr.Row():
110
+ with gr.Column(scale=1):
111
+ gr.Markdown("### Settings")
112
+ model_dropdown = gr.Dropdown(
113
+ choices=["Phi-4", "Phi-4-mini-instruct"],
114
+ label="Select Model",
115
+ value="Phi-4"
116
+ )
117
+ max_tokens_slider = gr.Slider(
118
+ minimum=64,
119
+ maximum=4096,
120
+ step=50,
121
+ value=512,
122
+ label="Max Tokens"
123
+ )
124
+ with gr.Accordion("Advanced Settings", open=False):
125
+ temperature_slider = gr.Slider(
126
+ minimum=0.1,
127
+ maximum=2.0,
128
+ value=1.0,
129
+ label="Temperature"
130
+ )
131
+ top_k_slider = gr.Slider(
132
+ minimum=1,
133
+ maximum=100,
134
+ step=1,
135
+ value=50,
136
+ label="Top-k"
137
+ )
138
+ top_p_slider = gr.Slider(
139
+ minimum=0.1,
140
+ maximum=1.0,
141
+ value=0.9,
142
+ label="Top-p"
143
+ )
144
+ repetition_penalty_slider = gr.Slider(
145
+ minimum=1.0,
146
+ maximum=2.0,
147
+ value=1.0,
148
+ label="Repetition Penalty"
149
+ )
150
+
151
+ with gr.Column(scale=4):
152
+ chatbot = gr.Chatbot(label="Chat", type="messages")
153
+ with gr.Row():
154
+ user_input = gr.Textbox(
155
+ label="Your message",
156
+ placeholder="Type your message here...",
157
+ scale=3
158
+ )
159
+ submit_button = gr.Button("Send", variant="primary", scale=1)
160
+ clear_button = gr.Button("Clear", scale=1)
161
+ gr.Markdown("**Try these examples:**")
162
+ with gr.Row():
163
+ example1_button = gr.Button("Learn about physics")
164
+ example2_button = gr.Button("Discover space facts")
165
+ example3_button = gr.Button("Write a factorial function")
166
+
167
+ submit_button.click(
168
+ fn=generate_response,
169
+ inputs=[user_input, model_dropdown, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
170
+ outputs=[chatbot, history_state]
171
+ ).then(
172
+ fn=lambda: gr.update(value=""),
173
+ inputs=None,
174
+ outputs=user_input
175
+ )
176
+
177
+ clear_button.click(
178
+ fn=lambda: ([], []),
179
+ inputs=None,
180
+ outputs=[chatbot, history_state]
181
+ )
182
+
183
+ example1_button.click(
184
+ fn=lambda: gr.update(value=example_messages["Learn about physics"]),
185
+ inputs=None,
186
+ outputs=user_input
187
+ )
188
+ example2_button.click(
189
+ fn=lambda: gr.update(value=example_messages["Discover space facts"]),
190
+ inputs=None,
191
+ outputs=user_input
192
+ )
193
+ example3_button.click(
194
+ fn=lambda: gr.update(value=example_messages["Write a factorial function"]),
195
+ inputs=None,
196
+ outputs=user_input
197
+ )
198
+
199
+ demo.launch(ssr_mode=False)