seawolf2357 commited on
Commit
a9e7179
ยท
verified ยท
1 Parent(s): a2b3420

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -57
app.py CHANGED
@@ -16,13 +16,15 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
16
 
17
  # CSV/TXT ๋ถ„์„
18
  import pandas as pd
19
-
20
- # PDF ํ…์ŠคํŠธ ์ถ”์ถœ
21
  import PyPDF2
22
 
23
- MAX_CONTENT_CHARS = 8000 # ๋„ˆ๋ฌด ํฐ ํŒŒ์ผ์„ ๋ง‰๊ธฐ ์œ„ํ•ด ์ตœ๋Œ€ ํ‘œ์‹œ 8000์ž
24
-
 
 
25
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 
26
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
27
  model = Gemma3ForConditionalGeneration.from_pretrained(
28
  model_id,
@@ -35,12 +37,10 @@ MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
35
 
36
 
37
  ##################################################
38
- # CSV, TXT, PDF ๋ถ„์„ ํ•จ์ˆ˜
39
  ##################################################
40
  def analyze_csv_file(path: str) -> str:
41
- """
42
- CSV ํŒŒ์ผ์„ ์ „์ฒด ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜. ๋„ˆ๋ฌด ๊ธธ ๊ฒฝ์šฐ ์ผ๋ถ€๋งŒ ํ‘œ์‹œ.
43
- """
44
  try:
45
  df = pd.read_csv(path)
46
  df_str = df.to_string()
@@ -52,9 +52,7 @@ def analyze_csv_file(path: str) -> str:
52
 
53
 
54
  def analyze_txt_file(path: str) -> str:
55
- """
56
- TXT ํŒŒ์ผ ์ „๋ฌธ ์ฝ๊ธฐ. ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ผ๋ถ€๋งŒ ํ‘œ์‹œ.
57
- """
58
  try:
59
  with open(path, "r", encoding="utf-8") as f:
60
  text = f.read()
@@ -66,9 +64,7 @@ def analyze_txt_file(path: str) -> str:
66
 
67
 
68
  def pdf_to_markdown(pdf_path: str) -> str:
69
- """
70
- PDF โ†’ Markdown. ํŽ˜์ด์ง€๋ณ„๋กœ ๊ฐ„๋‹จํžˆ ํ…์ŠคํŠธ ์ถ”์ถœ.
71
- """
72
  text_chunks = []
73
  try:
74
  with open(pdf_path, "rb") as f:
@@ -89,7 +85,7 @@ def pdf_to_markdown(pdf_path: str) -> str:
89
 
90
 
91
  ##################################################
92
- # ์ด๋ฏธ์ง€/๋น„๋””์˜ค ์—…๋กœ๋“œ ์ œํ•œ ๊ฒ€์‚ฌ
93
  ##################################################
94
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
95
  image_count = 0
@@ -106,8 +102,10 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
106
  image_count = 0
107
  video_count = 0
108
  for item in history:
 
109
  if item["role"] != "user" or isinstance(item["content"], str):
110
  continue
 
111
  if item["content"][0].endswith(".mp4"):
112
  video_count += 1
113
  else:
@@ -117,17 +115,13 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
117
 
118
  def validate_media_constraints(message: dict, history: list[dict]) -> bool:
119
  """
120
- - ๋น„๋””์˜ค 1๊ฐœ ์ดˆ๊ณผ ๋ถˆ๊ฐ€
121
- - ๋น„๋””์˜ค์™€ ์ด๋ฏธ์ง€ ํ˜ผํ•ฉ ๋ถˆ๊ฐ€
122
- - ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜ MAX_NUM_IMAGES ์ดˆ๊ณผ ๋ถˆ๊ฐ€
123
- - <image> ํƒœ๊ทธ๊ฐ€ ์žˆ์œผ๋ฉด ํƒœ๊ทธ ์ˆ˜์™€ ์‹ค์ œ ์ด๋ฏธ์ง€ ์ˆ˜ ์ผ์น˜
124
- - CSV, TXT, PDF ๋“ฑ์€ ์—ฌ๊ธฐ์„œ ์ œํ•œํ•˜์ง€ ์•Š์Œ
125
  """
126
  media_files = []
127
  for f in message["files"]:
128
- # ์ด๋ฏธ์ง€: png/jpg/jpeg/gif/webp
129
- # ๋น„๋””์˜ค: mp4
130
- # cf) PDF, CSV, TXT ๋“ฑ์€ ์ œ์™ธ
131
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
132
  media_files.append(f)
133
 
@@ -136,9 +130,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
136
  image_count = history_image_count + new_image_count
137
  video_count = history_video_count + new_video_count
138
 
 
139
  if video_count > 1:
140
  gr.Warning("Only one video is supported.")
141
  return False
 
142
  if video_count == 1:
143
  if image_count > 0:
144
  gr.Warning("Mixing images and videos is not allowed.")
@@ -146,9 +142,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
146
  if "<image>" in message["text"]:
147
  gr.Warning("Using <image> tags with video files is not supported.")
148
  return False
 
149
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
150
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
151
  return False
 
152
  if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
153
  gr.Warning("The number of <image> tags in the text does not match the number of images.")
154
  return False
@@ -157,16 +155,16 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
157
 
158
 
159
  ##################################################
160
- # ๋น„๋””์˜ค ์ฒ˜๋ฆฌ
161
  ##################################################
162
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
 
163
  vidcap = cv2.VideoCapture(video_path)
164
  fps = vidcap.get(cv2.CAP_PROP_FPS)
165
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
166
-
167
  frame_interval = int(fps / 3)
168
- frames = []
169
 
 
170
  for i in range(0, total_frames, frame_interval):
171
  vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
172
  success, image = vidcap.read()
@@ -175,7 +173,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
175
  pil_image = Image.fromarray(image)
176
  timestamp = round(i / fps, 2)
177
  frames.append((pil_image, timestamp))
178
-
179
  vidcap.release()
180
  return frames
181
 
@@ -183,8 +180,7 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
183
  def process_video(video_path: str) -> list[dict]:
184
  content = []
185
  frames = downsample_video(video_path)
186
- for frame in frames:
187
- pil_image, timestamp = frame
188
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
189
  pil_image.save(temp_file.name)
190
  content.append({"type": "text", "text": f"Frame {timestamp}:"})
@@ -194,7 +190,7 @@ def process_video(video_path: str) -> list[dict]:
194
 
195
 
196
  ##################################################
197
- # interleaved <image> ์ฒ˜๋ฆฌ
198
  ##################################################
199
  def process_interleaved_images(message: dict) -> list[dict]:
200
  parts = re.split(r"(<image>)", message["text"])
@@ -207,55 +203,56 @@ def process_interleaved_images(message: dict) -> list[dict]:
207
  elif part.strip():
208
  content.append({"type": "text", "text": part.strip()})
209
  else:
210
- # ๊ณต๋ฐฑ์ด๊ฑฐ๋‚˜ \n ๊ฐ™์€ ๊ฒฝ์šฐ
211
  if isinstance(part, str) and part != "<image>":
212
  content.append({"type": "text", "text": part})
213
  return content
214
 
215
 
216
  ##################################################
217
- # PDF + CSV + TXT + ์ด๋ฏธ์ง€/๋น„๋””์˜ค
218
  ##################################################
219
  def process_new_user_message(message: dict) -> list[dict]:
220
  if not message["files"]:
221
  return [{"type": "text", "text": message["text"]}]
222
 
223
- # 1) ํŒŒ์ผ ๋ถ„๋ฅ˜
224
  video_files = [f for f in message["files"] if f.endswith(".mp4")]
225
  image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
226
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
227
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
228
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
229
 
230
- # 2) ์‚ฌ์šฉ์ž ์›๋ณธ text ์ถ”๊ฐ€
231
  content_list = [{"type": "text", "text": message["text"]}]
232
 
233
- # 3) CSV
234
  for csv_path in csv_files:
235
  csv_analysis = analyze_csv_file(csv_path)
 
236
  content_list.append({"type": "text", "text": csv_analysis})
237
 
238
- # 4) TXT
239
  for txt_path in txt_files:
240
  txt_analysis = analyze_txt_file(txt_path)
241
  content_list.append({"type": "text", "text": txt_analysis})
242
 
243
- # 5) PDF
244
  for pdf_path in pdf_files:
245
  pdf_markdown = pdf_to_markdown(pdf_path)
246
  content_list.append({"type": "text", "text": pdf_markdown})
247
 
248
- # 6) ๋น„๋””์˜ค (ํ•œ ๊ฐœ๋งŒ ํ—ˆ์šฉ)
249
  if video_files:
250
  content_list += process_video(video_files[0])
251
  return content_list
252
 
253
- # 7) ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
254
  if "<image>" in message["text"]:
255
  # interleaved
256
  return process_interleaved_images(message)
257
  else:
258
- # ์ผ๋ฐ˜ ์—ฌ๋Ÿฌ ์žฅ
259
  for img_path in image_files:
260
  content_list.append({"type": "image", "url": img_path})
261
 
@@ -263,45 +260,45 @@ def process_new_user_message(message: dict) -> list[dict]:
263
 
264
 
265
  ##################################################
266
- # history -> LLM ๋ฉ”์‹œ์ง€ ๋ณ€ํ™˜
267
  ##################################################
268
  def process_history(history: list[dict]) -> list[dict]:
269
  messages = []
270
  current_user_content: list[dict] = []
271
  for item in history:
272
  if item["role"] == "assistant":
273
- # user_content๊ฐ€ ์Œ“์—ฌ์žˆ๋‹ค๋ฉด user ๋ฉ”์‹œ์ง€๋กœ ์ €์žฅ
274
  if current_user_content:
275
  messages.append({"role": "user", "content": current_user_content})
276
  current_user_content = []
277
- # ๊ทธ ๋’ค item์€ assistant
278
  messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
279
  else:
280
- # user
281
  content = item["content"]
282
  if isinstance(content, str):
283
  current_user_content.append({"type": "text", "text": content})
284
  else:
285
- # ์ด๋ฏธ์ง€๋‚˜ ๊ธฐํƒ€
286
  current_user_content.append({"type": "image", "url": content[0]})
287
  return messages
288
 
289
 
290
  ##################################################
291
- # ๋ฉ”์ธ ์ถ”๋ก  ํ•จ์ˆ˜
292
  ##################################################
293
  @spaces.GPU(duration=120)
294
  def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
 
295
  if not validate_media_constraints(message, history):
296
  yield ""
297
  return
298
 
 
299
  messages = []
300
  if system_prompt:
301
  messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
302
  messages.extend(process_history(history))
303
  messages.append({"role": "user", "content": process_new_user_message(message)})
304
 
 
305
  inputs = processor.apply_chat_template(
306
  messages,
307
  add_generation_prompt=True,
@@ -325,9 +322,6 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
325
  yield output
326
 
327
 
328
- ##################################################
329
- # ์˜ˆ์‹œ๋“ค (๊ธฐ์กด)
330
- ##################################################
331
  ##################################################
332
  # ์˜ˆ์‹œ๋“ค (ํ•œ๊ธ€ํ™” ๋ฒ„์ „)
333
  ##################################################
@@ -462,14 +456,18 @@ examples = [
462
 
463
 
464
 
 
 
 
 
465
  demo = gr.ChatInterface(
466
  fn=run,
467
  type="messages",
468
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
469
- # .webp, .png, .jpg, .jpeg, .gif, .mp4, .csv, .txt, .pdf ๋ชจ๋‘ ํ—ˆ์šฉ
470
  textbox=gr.MultimodalTextbox(
471
  file_types=[
472
- ".webp", ".png", ".jpg", ".jpeg", ".gif",
473
  ".mp4", ".csv", ".txt", ".pdf"
474
  ],
475
  file_count="multiple",
@@ -479,15 +477,18 @@ demo = gr.ChatInterface(
479
  additional_inputs=[
480
  gr.Textbox(
481
  label="System Prompt",
482
- value=(
483
- "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
484
- "correct solutions through systematic reasoning. Please answer in korean."
485
- )
 
 
 
 
486
  ),
487
- gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
488
  ],
489
  stop_btn=False,
490
- title="Vidraft-Gemma-3-27B",
491
  examples=examples,
492
  run_examples_on_click=False,
493
  cache_examples=False,
@@ -497,3 +498,6 @@ demo = gr.ChatInterface(
497
 
498
  if __name__ == "__main__":
499
  demo.launch()
 
 
 
 
16
 
17
  # CSV/TXT ๋ถ„์„
18
  import pandas as pd
19
+ # PDF ํ…์ŠคํŠธ ์ถ”์ถœ์šฉ
 
20
  import PyPDF2
21
 
22
+ ##################################################
23
+ # ์ƒ์ˆ˜ ๋ฐ ๋ชจ๋ธ ๋กœ๋”ฉ
24
+ ##################################################
25
+ MAX_CONTENT_CHARS = 8000 # ๋„ˆ๋ฌด ํฐ ํŒŒ์ผ ๋‚ด์šฉ์€ ์ด ์ •๋„๊นŒ์ง€๋งŒ ํ‘œ์‹œ
26
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
27
+
28
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
29
  model = Gemma3ForConditionalGeneration.from_pretrained(
30
  model_id,
 
37
 
38
 
39
  ##################################################
40
+ # 1) CSV, TXT, PDF ๋ถ„์„ ํ•จ์ˆ˜
41
  ##################################################
42
  def analyze_csv_file(path: str) -> str:
43
+ """CSV ํŒŒ์ผ์„ ์ฝ์–ด ๋ฌธ์ž์—ดํ™”. ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ผ๋ถ€๋งŒ ์ถœ๋ ฅ."""
 
 
44
  try:
45
  df = pd.read_csv(path)
46
  df_str = df.to_string()
 
52
 
53
 
54
  def analyze_txt_file(path: str) -> str:
55
+ """TXT ํŒŒ์ผ ์ „์ฒด๋ฅผ ์ฝ์–ด ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜. ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ๋ƒ„."""
 
 
56
  try:
57
  with open(path, "r", encoding="utf-8") as f:
58
  text = f.read()
 
64
 
65
 
66
  def pdf_to_markdown(pdf_path: str) -> str:
67
+ """PDF -> ํ…์ŠคํŠธ ์ถ”์ถœ -> Markdown ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜. ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž๋ฆ„."""
 
 
68
  text_chunks = []
69
  try:
70
  with open(pdf_path, "rb") as f:
 
85
 
86
 
87
  ##################################################
88
+ # 2) ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๊ฐœ์ˆ˜ ์ œํ•œ ๊ฒ€์‚ฌ
89
  ##################################################
90
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
91
  image_count = 0
 
102
  image_count = 0
103
  video_count = 0
104
  for item in history:
105
+ # assistant ๋ฉ”์‹œ์ง€์ด๊ฑฐ๋‚˜ content๊ฐ€ str์ด๋ฉด ์ œ์™ธ
106
  if item["role"] != "user" or isinstance(item["content"], str):
107
  continue
108
+ # ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๊ฒฝ๋กœ๋กœ๋งŒ ์นด์šดํŠธ
109
  if item["content"][0].endswith(".mp4"):
110
  video_count += 1
111
  else:
 
115
 
116
  def validate_media_constraints(message: dict, history: list[dict]) -> bool:
117
  """
118
+ - ์ด๋ฏธ์ง€/๋น„๋””์˜ค๋งŒ ๋Œ€์ƒ์œผ๋กœ ๊ฐœ์ˆ˜ยทํ˜ผํ•ฉ ์ œํ•œ
119
+ - CSV, PDF, TXT ๋“ฑ์€ ๋Œ€์ƒ ์ œ์™ธ
120
+ - <image> ํƒœ๊ทธ์™€ ์‹ค์ œ ์ด๋ฏธ์ง€ ์ˆ˜๊ฐ€ ์ผ์น˜ํ•˜๋Š”์ง€ ๋“ฑ
 
 
121
  """
122
  media_files = []
123
  for f in message["files"]:
124
+ # ์ด๋ฏธ์ง€ ํ™•์žฅ์ž ๋˜๋Š” .mp4
 
 
125
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
126
  media_files.append(f)
127
 
 
130
  image_count = history_image_count + new_image_count
131
  video_count = history_video_count + new_video_count
132
 
133
+ # ๋น„๋””์˜ค 1๊ฐœ ์ดˆ๊ณผ ๋ถˆ๊ฐ€
134
  if video_count > 1:
135
  gr.Warning("Only one video is supported.")
136
  return False
137
+ # ๋น„๋””์˜ค + ์ด๋ฏธ์ง€ ํ˜ผํ•ฉ ๋ถˆ๊ฐ€
138
  if video_count == 1:
139
  if image_count > 0:
140
  gr.Warning("Mixing images and videos is not allowed.")
 
142
  if "<image>" in message["text"]:
143
  gr.Warning("Using <image> tags with video files is not supported.")
144
  return False
145
+ # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜ ์ œํ•œ
146
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
147
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
148
  return False
149
+ # <image> ํƒœ๊ทธ์™€ ์‹ค์ œ ์ด๋ฏธ์ง€ ์ˆ˜๊ฐ€ ์ผ์น˜?
150
  if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
151
  gr.Warning("The number of <image> tags in the text does not match the number of images.")
152
  return False
 
155
 
156
 
157
  ##################################################
158
+ # 3) ๋น„๋””์˜ค ์ฒ˜๋ฆฌ
159
  ##################################################
160
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
161
+ """์˜์ƒ์—์„œ ์ผ์ • ๊ฐ„๊ฒฉ์œผ๋กœ ํ”„๋ ˆ์ž„์„ ์ถ”์ถœ, PIL ์ด๋ฏธ์ง€์™€ timestamp ๋ฐ˜ํ™˜."""
162
  vidcap = cv2.VideoCapture(video_path)
163
  fps = vidcap.get(cv2.CAP_PROP_FPS)
164
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
 
165
  frame_interval = int(fps / 3)
 
166
 
167
+ frames = []
168
  for i in range(0, total_frames, frame_interval):
169
  vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
170
  success, image = vidcap.read()
 
173
  pil_image = Image.fromarray(image)
174
  timestamp = round(i / fps, 2)
175
  frames.append((pil_image, timestamp))
 
176
  vidcap.release()
177
  return frames
178
 
 
180
  def process_video(video_path: str) -> list[dict]:
181
  content = []
182
  frames = downsample_video(video_path)
183
+ for pil_image, timestamp in frames:
 
184
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
185
  pil_image.save(temp_file.name)
186
  content.append({"type": "text", "text": f"Frame {timestamp}:"})
 
190
 
191
 
192
  ##################################################
193
+ # 4) interleaved <image> ์ฒ˜๋ฆฌ
194
  ##################################################
195
  def process_interleaved_images(message: dict) -> list[dict]:
196
  parts = re.split(r"(<image>)", message["text"])
 
203
  elif part.strip():
204
  content.append({"type": "text", "text": part.strip()})
205
  else:
206
+ # ๊ณต๋ฐฑ๋งŒ ์žˆ๋Š” ๊ฒฝ์šฐ
207
  if isinstance(part, str) and part != "<image>":
208
  content.append({"type": "text", "text": part})
209
  return content
210
 
211
 
212
  ##################################################
213
+ # 5) CSV/PDF/TXT๋Š” ํ…์ŠคํŠธ๋กœ๋งŒ, ์ด๋ฏธ์ง€/๋น„๋””์˜ค๋Š” ๊ฒฝ๋กœ๋กœ
214
  ##################################################
215
  def process_new_user_message(message: dict) -> list[dict]:
216
  if not message["files"]:
217
  return [{"type": "text", "text": message["text"]}]
218
 
219
+ # ํ™•์žฅ์ž๋ณ„ ๋ถ„๋ฅ˜
220
  video_files = [f for f in message["files"] if f.endswith(".mp4")]
221
  image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
222
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
223
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
224
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
225
 
226
+ # user ํ…์ŠคํŠธ ๋จผ์ € ์ถ”๊ฐ€
227
  content_list = [{"type": "text", "text": message["text"]}]
228
 
229
+ # CSV
230
  for csv_path in csv_files:
231
  csv_analysis = analyze_csv_file(csv_path)
232
+ # ๋ถ„์„ ๋‚ด์šฉ๋งŒ ๋„ฃ์Œ (ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ํžˆ์Šคํ† ๋ฆฌ์— ์ถ”๊ฐ€ํ•˜์ง€ ์•Š์Œ)
233
  content_list.append({"type": "text", "text": csv_analysis})
234
 
235
+ # TXT
236
  for txt_path in txt_files:
237
  txt_analysis = analyze_txt_file(txt_path)
238
  content_list.append({"type": "text", "text": txt_analysis})
239
 
240
+ # PDF
241
  for pdf_path in pdf_files:
242
  pdf_markdown = pdf_to_markdown(pdf_path)
243
  content_list.append({"type": "text", "text": pdf_markdown})
244
 
245
+ # ๋น„๋””์˜ค
246
  if video_files:
247
  content_list += process_video(video_files[0])
248
  return content_list
249
 
250
+ # ์ด๋ฏธ์ง€
251
  if "<image>" in message["text"]:
252
  # interleaved
253
  return process_interleaved_images(message)
254
  else:
255
+ # ์—ฌ๋Ÿฌ ์žฅ ์ด๋ฏธ์ง€
256
  for img_path in image_files:
257
  content_list.append({"type": "image", "url": img_path})
258
 
 
260
 
261
 
262
  ##################################################
263
+ # 6) history -> LLM ๋ฉ”์‹œ์ง€ ๋ณ€ํ™˜
264
  ##################################################
265
  def process_history(history: list[dict]) -> list[dict]:
266
  messages = []
267
  current_user_content: list[dict] = []
268
  for item in history:
269
  if item["role"] == "assistant":
 
270
  if current_user_content:
271
  messages.append({"role": "user", "content": current_user_content})
272
  current_user_content = []
 
273
  messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
274
  else:
 
275
  content = item["content"]
276
  if isinstance(content, str):
277
  current_user_content.append({"type": "text", "text": content})
278
  else:
279
+ # ์ด๋ฏธ์ง€ or ๊ธฐํƒ€ ํŒŒ์ผ url
280
  current_user_content.append({"type": "image", "url": content[0]})
281
  return messages
282
 
283
 
284
  ##################################################
285
+ # 7) ๋ฉ”์ธ ์ถ”๋ก  ํ•จ์ˆ˜
286
  ##################################################
287
  @spaces.GPU(duration=120)
288
  def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
289
+ # a) ์ด๋ฏธ์ง€/๋น„๋””์˜ค ์ œํ•œ ๊ฒ€์‚ฌ
290
  if not validate_media_constraints(message, history):
291
  yield ""
292
  return
293
 
294
+ # b) ๋Œ€ํ™” ๊ธฐ๋ก + ์ด๋ฒˆ ๋ฉ”์‹œ์ง€
295
  messages = []
296
  if system_prompt:
297
  messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
298
  messages.extend(process_history(history))
299
  messages.append({"role": "user", "content": process_new_user_message(message)})
300
 
301
+ # c) ๋ชจ๋ธ ์ถ”๋ก 
302
  inputs = processor.apply_chat_template(
303
  messages,
304
  add_generation_prompt=True,
 
322
  yield output
323
 
324
 
 
 
 
325
  ##################################################
326
  # ์˜ˆ์‹œ๋“ค (ํ•œ๊ธ€ํ™” ๋ฒ„์ „)
327
  ##################################################
 
456
 
457
 
458
 
459
+
460
+ ##################################################
461
+ # 9) Gradio ChatInterface
462
+ ##################################################
463
  demo = gr.ChatInterface(
464
  fn=run,
465
  type="messages",
466
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
467
+ # ์ด๋ฏธ์ง€/๋™์˜์ƒ + CSV/TXT/PDF ํ—ˆ์šฉ (์ด๋ฏธ์ง€: webp ํฌํ•จ)
468
  textbox=gr.MultimodalTextbox(
469
  file_types=[
470
+ ".png", ".jpg", ".jpeg", ".gif", ".webp",
471
  ".mp4", ".csv", ".txt", ".pdf"
472
  ],
473
  file_count="multiple",
 
477
  additional_inputs=[
478
  gr.Textbox(
479
  label="System Prompt",
480
+ value="You are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."
481
+ ),
482
+ gr.Slider(
483
+ label="Max New Tokens",
484
+ minimum=100,
485
+ maximum=8000,
486
+ step=50,
487
+ value=2000
488
  ),
 
489
  ],
490
  stop_btn=False,
491
+ title="Gemma 3 27B IT",
492
  examples=examples,
493
  run_examples_on_click=False,
494
  cache_examples=False,
 
498
 
499
  if __name__ == "__main__":
500
  demo.launch()
501
+
502
+
503
+