kimhyunwoo commited on
Commit
b9185e7
ยท
verified ยท
1 Parent(s): 0c5d476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -279
app.py CHANGED
@@ -1,4 +1,7 @@
1
  import os
 
 
 
2
  import threading
3
  import torch
4
  import torch._dynamo
@@ -12,55 +15,17 @@ from transformers import (
12
  import gradio as gr
13
  import spaces
14
 
15
- # ํ•„์š”ํ•œ ๊ฒฝ์šฐ Bitnet ์ง€์›์„ ์œ„ํ•œ transformers ์„ค์น˜
16
- # Hugging Face Spaces์—์„œ๋Š” Dockerfile ๋“ฑ์„ ํ†ตํ•ด ๋ฏธ๋ฆฌ ์„ค์น˜ํ•˜๋Š” ๊ฒƒ์ด ๋” ์ผ๋ฐ˜์ ์ž…๋‹ˆ๋‹ค.
17
- # ๋กœ์ปฌ์—์„œ ํ…Œ์ŠคํŠธ ์‹œ์—๋Š” ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
18
- # print("Installing required transformers branch...")
19
- # try:
20
- # os.system("pip install git+https://github.com/shumingma/transformers.git -q")
21
- # print("transformers branch installed.")
22
- # except Exception as e:
23
- # print(f"Error installing transformers branch: {e}")
24
- # print("Proceeding with potentially default transformers version.")
25
-
26
- # os.system("pip install accelerate bitsandbytes -q") # bitsandbytes, accelerate๋„ ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
27
-
28
-
29
  model_id = "microsoft/bitnet-b1.58-2B-4T"
30
 
31
- # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
32
- print(f"Loading model: {model_id}")
33
- try:
34
- tokenizer = AutoTokenizer.from_pretrained(model_id)
35
- # device_map="auto"๋Š” ์—ฌ๋Ÿฌ GPU ๋˜๋Š” CPU๋กœ ๋ชจ๋ธ์„ ์ž๋™์œผ๋กœ ๋ถ„์‚ฐ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
36
- # bfloat16์€ ๋ชจ๋ธ ๊ฐ€์ค‘์น˜์— ์‚ฌ์šฉ๋˜๋Š” ๋ฐ์ดํ„ฐ ํƒ€์ž…์ž…๋‹ˆ๋‹ค.
37
- model = AutoModelForCausalLM.from_pretrained(
38
- model_id,
39
- torch_dtype=torch.bfloat16,
40
- device_map="auto",
41
- # load_in_8bit=True # Bitnet์€ 1.58bit์ด๋ฏ€๋กœ 8bit ๋กœ๋”ฉ์ด ์˜๋ฏธ ์—†์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
42
- )
43
- print(f"Model loaded successfully on device: {model.device}")
44
- except Exception as e:
45
- print(f"Error loading model: {e}")
46
- # ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ ์‹œ ๋”๋ฏธ ๋ชจ๋ธ ์‚ฌ์šฉ ๋˜๋Š” ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
47
- class DummyModel:
48
- def generate(self, **kwargs):
49
- # ๋”๋ฏธ ์‘๋‹ต ์ƒ์„ฑ
50
- input_ids = kwargs.get('input_ids')
51
- streamer = kwargs.get('streamer')
52
- if streamer:
53
- # ๊ฐ„๋‹จํ•œ ๋”๋ฏธ ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
54
- dummy_response = "๋ชจ๋ธ ๋กœ๋”ฉ์— ์‹คํŒจํ•˜์—ฌ ๋”๋ฏธ ์‘๋‹ต์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ์„ค์ •/๊ฒฝ๋กœ๋ฅผ ํ™•์ธํ•˜์„ธ์š”."
55
- for char in dummy_response:
56
- streamer.put(char)
57
- streamer.end()
58
- model = DummyModel()
59
- tokenizer = AutoTokenizer.from_pretrained("gpt2") # ๋”๋ฏธ ํ† ํฌ๋‚˜์ด์ €
60
- print("Using dummy model due to loading failure.")
61
-
62
 
63
- @spaces.GPU # Hugging Face Spaces์—์„œ GPU ์‚ฌ์šฉ์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
64
  def respond(
65
  message: str,
66
  history: list[tuple[str, str]],
@@ -81,11 +46,6 @@ def respond(
81
  Yields:
82
  The growing response text as new tokens are generated.
83
  """
84
- # ๋”๋ฏธ ๋ชจ๋ธ ์‚ฌ์šฉ ์‹œ ์ŠคํŠธ๋ฆฌ๋ฐ ์˜ค๋ฅ˜ ๋ฐฉ์ง€
85
- if isinstance(model, DummyModel):
86
- yield "๋ชจ๋ธ ๋กœ๋”ฉ์— ์‹คํŒจํ•˜์—ฌ ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
87
- return
88
-
89
  messages = [{"role": "system", "content": system_message}]
90
  for user_msg, bot_msg in history:
91
  if user_msg:
@@ -94,236 +54,45 @@ def respond(
94
  messages.append({"role": "assistant", "content": bot_msg})
95
  messages.append({"role": "user", "content": message})
96
 
97
- try:
98
- prompt = tokenizer.apply_chat_template(
99
- messages, tokenize=False, add_generation_prompt=True
100
- )
101
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
102
-
103
- streamer = TextIteratorStreamer(
104
- tokenizer, skip_prompt=True, skip_special_tokens=True
105
- )
106
- generate_kwargs = dict(
107
- **inputs,
108
- streamer=streamer,
109
- max_new_tokens=max_tokens,
110
- temperature=temperature,
111
- top_p=top_p,
112
- do_sample=True,
113
- # Bitnet ๋ชจ๋ธ์— ํ•„์š”ํ•œ ์ถ”๊ฐ€ ์ธ์ž ์„ค์ • (๋ชจ๋ธ ๋ฌธ์„œ ํ™•์ธ ํ•„์š”)
114
- # ์˜ˆ๋ฅผ ๋“ค์–ด, quantize_config ๋“ฑ
115
- )
116
-
117
- # ์“ฐ๋ ˆ๋“œ์—์„œ ๋ชจ๋ธ ์ƒ์„ฑ ์‹คํ–‰
118
- thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
119
- thread.start()
120
-
121
- # ์ŠคํŠธ๋ฆฌ๋จธ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์™€ yield
122
- response = ""
123
- for new_text in streamer:
124
- # yield ํ•˜๊ธฐ ์ „์— ๋ถˆํ•„์š”ํ•œ ๊ณต๋ฐฑ/ํ† ํฐ ์ œ๊ฑฐ ๋˜๋Š” ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅ
125
- response += new_text
126
- yield response
127
-
128
- except Exception as e:
129
- print(f"Error during response generation: {e}")
130
- yield f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
131
-
132
-
133
- # --- ๋””์ž์ธ ๊ฐœ์„ ์„ ์œ„ํ•œ CSS ์ฝ”๋“œ ---
134
- css_styles = """
135
- /* ์ „์ฒด ํŽ˜์ด์ง€ ๋ฐฐ๊ฒฝ ๋ฐ ๊ธฐ๋ณธ ํฐํŠธ ์„ค์ • */
136
- body {
137
- font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif;
138
- line-height: 1.6;
139
- margin: 0;
140
- padding: 20px; /* ์•ฑ ์ฃผ๋ณ€ ์—ฌ๋ฐฑ ์ถ”๊ฐ€ */
141
- background-color: #f4f7f6; /* ๋ถ€๋“œ๋Ÿฌ์šด ๋ฐฐ๊ฒฝ์ƒ‰ */
142
- }
143
-
144
- /* ๋ฉ”์ธ ์•ฑ ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
145
- .gradio-container {
146
- max-width: 900px; /* ์ค‘์•™ ์ •๋ ฌ ๋ฐ ์ตœ๋Œ€ ๋„ˆ๋น„ ์ œํ•œ */
147
- margin: 20px auto;
148
- border-radius: 12px; /* ๋‘ฅ๊ทผ ๋ชจ์„œ๋ฆฌ */
149
- overflow: hidden; /* ์ž์‹ ์š”์†Œ๋“ค์ด ๋ชจ์„œ๋ฆฌ๋ฅผ ๋„˜์ง€ ์•Š๋„๋ก */
150
- box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1); /* ๊ทธ๋ฆผ์ž ํšจ๊ณผ */
151
- background-color: #ffffff; /* ์•ฑ ๋‚ด์šฉ ์˜์—ญ ๋ฐฐ๊ฒฝ์ƒ‰ */
152
- }
153
-
154
- /* ํƒ€์ดํ‹€ ๋ฐ ์„ค๋ช… ์˜์—ญ (ChatInterface์˜ ๊ธฐ๋ณธ ํƒ€์ดํ‹€/์„ค๋ช…) */
155
- /* ์ด ์˜์—ญ์€ ChatInterface ๊ตฌ์กฐ์— ๋”ฐ๋ผ ์ •ํ™•ํ•œ ํด๋ž˜์Šค ์ด๋ฆ„์ด ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์œผ๋‚˜,
156
- .gradio-container ๋‚ด๋ถ€์˜ ์ฒซ ๋ธ”๋ก์ด๋‚˜ H1/P ํƒœ๊ทธ๋ฅผ ํƒ€๊ฒŸํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
157
- ํ…Œ๋งˆ์™€ ํ•จ๊ป˜ ์‚ฌ์šฉํ•˜๋ฉด ๋Œ€๋ถ€๋ถ„ ์ž˜ ์ฒ˜๋ฆฌ๋ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์„œ๋Š” ์ถ”๊ฐ€์ ์ธ ํŒจ๋”ฉ ๋“ฑ๋งŒ ๊ณ ๋ ค */
158
- .gradio-container > .gradio-block:first-child {
159
- padding: 20px 20px 10px 20px; /* ์ƒ๋‹จ ํŒจ๋”ฉ ์กฐ์ • */
160
- }
161
-
162
- /* ์ฑ„ํŒ… ๋ฐ•์Šค ์˜์—ญ ์Šคํƒ€์ผ */
163
- .gradio-chatbox {
164
- /* ํ…Œ๋งˆ์— ์˜ํ•ด ์Šคํƒ€์ผ๋ง๋˜์ง€๋งŒ, ์ถ”๊ฐ€์ ์ธ ๋‚ด๋ถ€ ํŒจ๋”ฉ ๋“ฑ ์กฐ์ • ๊ฐ€๋Šฅ */
165
- padding: 15px;
166
- background-color: #fefefe; /* ์ฑ„ํŒ… ์˜์—ญ ๋ฐฐ๊ฒฝ์ƒ‰ */
167
- border-radius: 8px; /* ์ฑ„ํŒ… ์˜์—ญ ๋‚ด๋ถ€ ๋ชจ์„œ๋ฆฌ */
168
- border: 1px solid #e0e0e0; /* ๊ฒฝ๊ณ„์„  */
169
- }
170
-
171
- /* ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
172
- .gradio-chatmessage {
173
- margin-bottom: 12px;
174
- padding: 10px 15px;
175
- border-radius: 20px; /* ๋‘ฅ๊ทผ ๋ฉ”์‹œ์ง€ ๋ชจ์„œ๋ฆฌ */
176
- max-width: 75%; /* ๋ฉ”์‹œ์ง€ ๋„ˆ๋น„ ์ œํ•œ */
177
- word-wrap: break-word; /* ๊ธด ๋‹จ์–ด ์ค„๋ฐ”๊ฟˆ */
178
- white-space: pre-wrap; /* ๊ณต๋ฐฑ ๋ฐ ์ค„๋ฐ”๊ฟˆ ์œ ์ง€ */
179
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); /* ๋ฉ”์‹œ์ง€์— ์•ฝ๊ฐ„์˜ ๊ทธ๋ฆผ์ž */
180
- }
181
-
182
- /* ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
183
- .gradio-chatmessage.user {
184
- background-color: #007bff; /* ํŒŒ๋ž€์ƒ‰ ๊ณ„์—ด */
185
- color: white;
186
- margin-left: auto; /* ์˜ค๋ฅธ์ชฝ ์ •๋ ฌ */
187
- border-bottom-right-radius: 2px; /* ์˜ค๋ฅธ์ชฝ ์•„๋ž˜ ๋ชจ์„œ๋ฆฌ ๊ฐ์ง€๊ฒŒ */
188
- }
189
-
190
- /* ๋ด‡ ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
191
- .gradio-chatmessage.bot {
192
- background-color: #e9ecef; /* ๋ฐ์€ ํšŒ์ƒ‰ */
193
- color: #333; /* ์–ด๋‘์šด ํ…์ŠคํŠธ */
194
- margin-right: auto; /* ์™ผ์ชฝ ์ •๋ ฌ */
195
- border-bottom-left-radius: 2px; /* ์™ผ์ชฝ ์•„๋ž˜ ๋ชจ์„œ๋ฆฌ ๊ฐ์ง€๊ฒŒ */
196
- }
197
-
198
- /* ์ž…๋ ฅ์ฐฝ ๋ฐ ๋ฒ„ํŠผ ์˜์—ญ ์Šคํƒ€์ผ */
199
- .gradio-input-box {
200
- padding: 15px;
201
- border-top: 1px solid #eee; /* ์œ„์ชฝ ๊ฒฝ๊ณ„์„  */
202
- background-color: #f8f9fa; /* ์ž…๋ ฅ ์˜์—ญ ๋ฐฐ๊ฒฝ์ƒ‰ */
203
- }
204
- /* ์ž…๋ ฅ ํ…์ŠคํŠธ ์—์–ด๋ฆฌ์–ด ์Šคํƒ€์ผ */
205
- .gradio-input-box textarea {
206
- border-radius: 8px;
207
- padding: 10px;
208
- border: 1px solid #ccc;
209
- resize: none !important; /* ์ž…๋ ฅ์ฐฝ ํฌ๊ธฐ ์กฐ์ ˆ ๋น„ํ™œ์„ฑํ™” (์„ ํƒ ์‚ฌํ•ญ) */
210
- min-height: 50px; /* ์ตœ์†Œ ๋†’์ด */
211
- max-height: 150px; /* ์ตœ๋Œ€ ๋†’์ด */
212
- overflow-y: auto; /* ๋‚ด์šฉ ๋„˜์น  ๊ฒฝ์šฐ ์Šคํฌ๋กค */
213
- }
214
- /* ์Šคํฌ๋กค๋ฐ” ์Šคํƒ€์ผ (์„ ํƒ ์‚ฌํ•ญ) */
215
- .gradio-input-box textarea::-webkit-scrollbar {
216
- width: 8px;
217
- }
218
- .gradio-input-box textarea::-webkit-scrollbar-thumb {
219
- background-color: #ccc;
220
- border-radius: 4px;
221
- }
222
- .gradio-input-box textarea::-webkit-scrollbar-track {
223
- background-color: #f1f1f1;
224
- }
225
-
226
-
227
- /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
228
- .gradio-button {
229
- border-radius: 8px;
230
- padding: 10px 20px;
231
- font-weight: bold;
232
- transition: background-color 0.2s ease, opacity 0.2s ease; /* ํ˜ธ๋ฒ„ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
233
- border: none; /* ๊ธฐ๋ณธ ํ…Œ๋‘๋ฆฌ ์ œ๊ฑฐ */
234
- cursor: pointer;
235
- }
236
-
237
- .gradio-button:not(.clear-button) { /* Send ๋ฒ„ํŠผ */
238
- background-color: #28a745; /* ์ดˆ๋ก์ƒ‰ */
239
- color: white;
240
- }
241
- .gradio-button:not(.clear-button):hover {
242
- background-color: #218838;
243
- }
244
- .gradio-button:disabled { /* ๋น„ํ™œ์„ฑํ™”๋œ ๋ฒ„ํŠผ */
245
- opacity: 0.6;
246
- cursor: not-allowed;
247
- }
248
-
249
-
250
- .gradio-button.clear-button { /* Clear ๋ฒ„ํŠผ */
251
- background-color: #dc3545; /* ๋นจ๊ฐ„์ƒ‰ */
252
- color: white;
253
- }
254
- .gradio-button.clear-button:hover {
255
- background-color: #c82333;
256
- }
257
-
258
- /* Additional inputs (์ถ”๊ฐ€ ์„ค์ •) ์˜์—ญ ์Šคํƒ€์ผ */
259
- /* ์ด ์˜์—ญ์€ ๋ณดํ†ต ์•„์ฝ”๋””์–ธ ํ˜•ํƒœ๋กœ ๋˜์–ด ์žˆ์œผ๋ฉฐ, .gradio-accordion ํด๋ž˜์Šค๋ฅผ ๊ฐ€์ง‘๋‹ˆ๋‹ค. */
260
- .gradio-accordion {
261
- border-radius: 12px; /* ์™ธ๋ถ€ ์ปจํ…Œ์ด๋„ˆ์™€ ๋™์ผํ•œ ๋ชจ์„œ๋ฆฌ */
262
- margin-top: 15px; /* ์ฑ„ํŒ… ์˜์—ญ๊ณผ์˜ ๊ฐ„๊ฒฉ */
263
- border: 1px solid #ddd; /* ๊ฒฝ๊ณ„์„  */
264
- box-shadow: none; /* ๋‚ด๋ถ€ ๊ทธ๋ฆผ์ž ์ œ๊ฑฐ */
265
- }
266
- /* ์•„์ฝ”๋””์–ธ ํ—ค๋” (๋ผ๋ฒจ) ์Šคํƒ€์ผ */
267
- .gradio-accordion .label {
268
- font-weight: bold;
269
- color: #007bff; /* ํŒŒ๋ž€์ƒ‰ ๊ณ„์—ด */
270
- padding: 15px; /* ํ—ค๋” ํŒจ๋”ฉ */
271
- background-color: #e9ecef; /* ํ—ค๋” ๋ฐฐ๊ฒฝ์ƒ‰ */
272
- border-bottom: 1px solid #ddd; /* ํ—ค๋” ์•„๋ž˜ ๊ฒฝ๊ณ„์„  */
273
- border-top-left-radius: 11px; /* ์ƒ๋‹จ ๋ชจ์„œ๋ฆฌ */
274
- border-top-right-radius: 11px;
275
- }
276
- /* ์•„์ฝ”๋””์–ธ ๋‚ด์šฉ ์˜์—ญ ์Šคํƒ€์ผ */
277
- .gradio-accordion .wrap {
278
- padding: 15px; /* ๋‚ด์šฉ ํŒจ๋”ฉ */
279
- background-color: #fefefe; /* ๋‚ด์šฉ ๋ฐฐ๊ฒฝ์ƒ‰ */
280
- border-bottom-left-radius: 11px; /* ํ•˜๋‹จ ๋ชจ์„œ๋ฆฌ */
281
- border-bottom-right-radius: 11px;
282
- }
283
- /* ์ถ”๊ฐ€ ์„ค์ • ๋‚ด ๊ฐœ๋ณ„ ์ž…๋ ฅ ์ปดํฌ๋„ŒํŠธ ์Šค๏ฟฝ๏ฟฝ๏ฟฝ์ผ (์Šฌ๋ผ์ด๋”, ํ…์ŠคํŠธ๋ฐ•์Šค ๋“ฑ) */
284
- .gradio-slider, .gradio-textbox, .gradio-number {
285
- margin-bottom: 10px; /* ๊ฐ ์ž…๋ ฅ ์š”์†Œ ์•„๋ž˜ ๊ฐ„๊ฒฉ */
286
- padding: 8px; /* ๋‚ด๋ถ€ ํŒจ๋”ฉ */
287
- border: 1px solid #e0e0e0; /* ๊ฒฝ๊ณ„์„  */
288
- border-radius: 8px; /* ๋‘ฅ๊ทผ ๋ชจ์„œ๋ฆฌ */
289
- background-color: #fff; /* ๋ฐฐ๊ฒฝ์ƒ‰ */
290
- }
291
- /* ์ž…๋ ฅ ํ•„๋“œ ๋ผ๋ฒจ ์Šคํƒ€์ผ */
292
- .gradio-label {
293
- font-weight: normal; /* ๋ผ๋ฒจ ํฐํŠธ ๊ตต๊ธฐ */
294
- margin-bottom: 5px; /* ๋ผ๋ฒจ๊ณผ ์ž…๋ ฅ ํ•„๋“œ ๊ฐ„ ๊ฐ„๊ฒฉ */
295
- color: #555; /* ๋ผ๋ฒจ ์ƒ‰์ƒ */
296
- display: block; /* ๋ผ๋ฒจ์„ ๋ธ”๋ก ์š”์†Œ๋กœ ๋งŒ๋“ค์–ด ์œ„๋กœ ์˜ฌ๋ฆผ */
297
- }
298
- /* ์Šฌ๋ผ์ด๋” ํŠธ๋ž™ ๋ฐ ํ•ธ๋“ค ์Šคํƒ€์ผ (๋” ์„ธ๋ฐ€ํ•œ ์กฐ์ • ๊ฐ€๋Šฅ) */
299
- /* ์˜ˆ: .gradio-slider input[type="range"]::-webkit-slider-thumb {} */
300
-
301
 
302
- /* ๋งˆํฌ๋‹ค์šด/HTML ์ปดํฌ๋„ŒํŠธ ๋‚ด ์Šคํƒ€์ผ */
303
- .gradio-markdown, .gradio-html {
304
- padding: 10px 0; /* ์ƒํ•˜ ํŒจ๋”ฉ */
305
- }
306
- """
307
- # --- ๋””์ž์ธ ๊ฐœ์„ ์„ ์œ„ํ•œ CSS ์ฝ”๋“œ ๋ ---
 
 
 
 
 
 
 
308
 
 
 
 
 
309
 
310
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
311
  demo = gr.ChatInterface(
312
  fn=respond,
313
- # ํƒ€์ดํ‹€ ๋ฐ ์„ค๋ช…์— HTML ํƒœ๊ทธ ์‚ฌ์šฉ ์˜ˆ์‹œ (<br> ํƒœ๊ทธ ์‚ฌ์šฉ)
314
- title="<h1 style='text-align: center; color: #007bff;'>Bitnet-b1.58-2B-4T Chatbot</h1>",
315
- description="<p style='text-align: center; color: #555;'>This chat application is powered by Microsoft's SOTA Bitnet-b1.58-2B-4T and designed for natural and fast conversations.</p>",
316
  examples=[
317
  [
318
- "Hello! How are you?",
319
- "You are a helpful AI assistant for everyday tasks.",
320
  512,
321
  0.7,
322
  0.95,
323
  ],
324
  [
325
- "Can you code a snake game in Python?",
326
- "You are a helpful AI assistant for coding.",
327
  2048,
328
  0.7,
329
  0.95,
@@ -332,8 +101,7 @@ demo = gr.ChatInterface(
332
  additional_inputs=[
333
  gr.Textbox(
334
  value="You are a helpful AI assistant.",
335
- label="System message",
336
- lines=3 # ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€ ์ž…๋ ฅ์ฐฝ ๋†’์ด ์กฐ์ ˆ
337
  ),
338
  gr.Slider(
339
  minimum=1,
@@ -357,14 +125,7 @@ demo = gr.ChatInterface(
357
  label="Top-p (nucleus sampling)"
358
  ),
359
  ],
360
- # ํ…Œ๋งˆ ์ ์šฉ (์—ฌ๋Ÿฌ ํ…Œ๋งˆ ์ค‘ ์„ ํƒ ๊ฐ€๋Šฅ: gr.themes.Soft(), gr.themes.Glass(), gr.themes.Default(), etc.)
361
- theme=gr.themes.Soft(),
362
- # ์ปค์Šคํ…€ CSS ์ ์šฉ
363
- css=css_styles,
364
  )
365
 
366
- # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
367
  if __name__ == "__main__":
368
- # launch(share=True)๋Š” ํผ๋ธ”๋ฆญ URL ์ƒ์„ฑ (๋””๋ฒ„๊น…/๊ณต์œ  ๋ชฉ์ , ์ฃผ์˜ ํ•„์š”)
369
- demo.launch()
370
- # demo.launch(debug=True) # ๋””๋ฒ„๊น… ๋ชจ๋“œ ํ™œ์„ฑํ™”
 
1
  import os
2
+
3
+ os.system("pip install git+https://github.com/shumingma/transformers.git")
4
+
5
  import threading
6
  import torch
7
  import torch._dynamo
 
15
  import gradio as gr
16
  import spaces
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  model_id = "microsoft/bitnet-b1.58-2B-4T"
19
 
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_id,
23
+ torch_dtype=torch.bfloat16,
24
+ device_map="auto"
25
+ )
26
+ print(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ @spaces.GPU
29
  def respond(
30
  message: str,
31
  history: list[tuple[str, str]],
 
46
  Yields:
47
  The growing response text as new tokens are generated.
48
  """
 
 
 
 
 
49
  messages = [{"role": "system", "content": system_message}]
50
  for user_msg, bot_msg in history:
51
  if user_msg:
 
54
  messages.append({"role": "assistant", "content": bot_msg})
55
  messages.append({"role": "user", "content": message})
56
 
57
+ prompt = tokenizer.apply_chat_template(
58
+ messages, tokenize=False, add_generation_prompt=True
59
+ )
60
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ streamer = TextIteratorStreamer(
63
+ tokenizer, skip_prompt=True, skip_special_tokens=True
64
+ )
65
+ generate_kwargs = dict(
66
+ **inputs,
67
+ streamer=streamer,
68
+ max_new_tokens=max_tokens,
69
+ temperature=temperature,
70
+ top_p=top_p,
71
+ do_sample=True,
72
+ )
73
+ thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
74
+ thread.start()
75
 
76
+ response = ""
77
+ for new_text in streamer:
78
+ response += new_text
79
+ yield response
80
 
 
81
  demo = gr.ChatInterface(
82
  fn=respond,
83
+ title="Bitnet-b1.58-2B-4T",
84
+ description="Bitnet-b1.58-2B-4T",
 
85
  examples=[
86
  [
87
+ "Hello!",
88
+ "You are a helpful AI.",
89
  512,
90
  0.7,
91
  0.95,
92
  ],
93
  [
94
+ "Can you code a snake game?",
95
+ "You are a helpful AI.",
96
  2048,
97
  0.7,
98
  0.95,
 
101
  additional_inputs=[
102
  gr.Textbox(
103
  value="You are a helpful AI assistant.",
104
+ label="System message"
 
105
  ),
106
  gr.Slider(
107
  minimum=1,
 
125
  label="Top-p (nucleus sampling)"
126
  ),
127
  ],
 
 
 
 
128
  )
129
 
 
130
  if __name__ == "__main__":
131
+ demo.launch()