Gemma-3-R1984-4B

Running on Zero

App Files Files Community

seawolf2357 commited on Mar 16

Commit

00dba49

verified ·

1 Parent(s): 1670280

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -141

app.py CHANGED Viewed

@@ -17,7 +17,10 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
 # CSV/TXT 분석
 import pandas as pd
-MAX_CONTENT_CHARS = 8000  # 파일에서 읽은 내용이 너무 길 경우 이 정도에서 잘라냄
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -31,16 +34,18 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 def analyze_csv_file(path: str) -> str:
     """
-    CSV 파일을 읽어 문자열화. 너무 크면 일부만 잘라냄.
     """
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
     except Exception as e:
         return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
@@ -48,19 +53,44 @@ def analyze_csv_file(path: str) -> str:
 def analyze_txt_file(path: str) -> str:
     """
-    TXT 파일 전문 읽어들이되, 너무 길면 잘라냄.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
         if len(text) > MAX_CONTENT_CHARS:
             text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
     except Exception as e:
         return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -88,14 +118,16 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
     - 비디오 1개 초과 불가
-    - 비디오/이미지 혼합 불가
     - 이미지 개수 MAX_NUM_IMAGES 초과 불가
-    - <image> 태그가 있으면 태그 수와 실제 이미지 개수 일치
-    - CSV, TXT, PDF 등은 여기서 제한하지 않음.
     """
     media_files = []
     for f in message["files"]:
-        # 이미지(여러 확장자)나 mp4만 체크
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
@@ -124,6 +156,9 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     return True
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
@@ -158,84 +193,103 @@ def process_video(video_path: str) -> list[dict]:
     return content
 def process_interleaved_images(message: dict) -> list[dict]:
-    logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
-    logger.debug(f"{parts=}")
     content = []
     image_index = 0
     for part in parts:
         if part == "<image>":
             content.append({"type": "image", "url": message["files"][image_index]})
-            logger.debug(f"file: {message['files'][image_index]}")
             image_index += 1
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
-        elif isinstance(part, str) and part != "<image>":
-            content.append({"type": "text", "text": part})
-    logger.debug(f"{content=}")
     return content
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
-    # 확장자별 분류
     video_files = [f for f in message["files"] if f.endswith(".mp4")]
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
-    # 사용자 입력 텍스트를 먼저
     content_list = [{"type": "text", "text": message["text"]}]
-    # CSV 전문
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
-    # TXT 전문
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
-    # 동영상 처리
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
-    # interleaved 이미지
     if "<image>" in message["text"]:
         return process_interleaved_images(message)
-    # 일반 이미지들
-    if image_files:
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
     return content_list
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
             else:
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
@@ -257,140 +311,37 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
     ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
         inputs,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     output = ""
-    for delta in streamer:
-        output += delta
         yield output
 examples = [
     [
         {
-            "text": "I need to be in Japan for 10 days, going to Tokyo, Kyoto and Osaka. Think about number of attractions in each of them and allocate number of days to each city. Make public transport recommendations.",
-            "files": [],
-        }
-    ],
-    [
-        {
-            "text": "Write the matplotlib code to generate the same bar chart.",
-            "files": ["assets/additional-examples/barchart.png"],
-        }
-    ],
-    [
-        {
-            "text": "What is odd about this video?",
-            "files": ["assets/additional-examples/tmp.mp4"],
-        }
-    ],
-    [
-        {
-            "text": "I already have this supplement <image> and I want to buy this one <image>. Any warnings I should know about?",
-            "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
-        }
-    ],
-    [
-        {
-            "text": "Write a poem inspired by the visual elements of the images.",
-            "files": ["assets/sample-images/06-1.png", "assets/sample-images/06-2.png"],
-        }
-    ],
-    [
-        {
-            "text": "Compose a short musical piece inspired by the visual elements of the images.",
-            "files": [
-                "assets/sample-images/07-1.png",
-                "assets/sample-images/07-2.png",
-                "assets/sample-images/07-3.png",
-                "assets/sample-images/07-4.png",
-            ],
-        }
-    ],
-    [
-        {
-            "text": "Write a short story about what might have happened in this house.",
-            "files": ["assets/sample-images/08.png"],
-        }
-    ],
-    [
-        {
-            "text": "Create a short story based on the sequence of images.",
-            "files": [
-                "assets/sample-images/09-1.png",
-                "assets/sample-images/09-2.png",
-                "assets/sample-images/09-3.png",
-                "assets/sample-images/09-4.png",
-                "assets/sample-images/09-5.png",
-            ],
-        }
-    ],
-    [
-        {
-            "text": "Describe the creatures that would live in this world.",
-            "files": ["assets/sample-images/10.png"],
-        }
-    ],
-    [
-        {
-            "text": "Read text in the image.",
-            "files": ["assets/additional-examples/1.png"],
-        }
-    ],
-    [
-        {
-            "text": "When is this ticket dated and how much did it cost?",
-            "files": ["assets/additional-examples/2.png"],
-        }
-    ],
-    [
-        {
-            "text": "Read the text in the image into markdown.",
-            "files": ["assets/additional-examples/3.png"],
-        }
-    ],
-    [
-        {
-            "text": "Evaluate this integral.",
-            "files": ["assets/additional-examples/4.png"],
-        }
-    ],
-    [
-        {
-            "text": "caption this image",
-            "files": ["assets/sample-images/01.png"],
-        }
-    ],
-    [
-        {
-            "text": "What's the sign says?",
-            "files": ["assets/sample-images/02.png"],
-        }
-    ],
-    [
-        {
-            "text": "Compare and contrast the two images.",
-            "files": ["assets/sample-images/03.png"],
-        }
-    ],
-    [
-        {
-            "text": "List all the objects in the image and their colors.",
-            "files": ["assets/sample-images/04.png"],
         }
     ],
     [
         {
-            "text": "Describe the atmosphere of the scene.",
-            "files": ["assets/sample-images/05.png"],
         }
     ],
 ]
@@ -411,7 +362,10 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value="You are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."
         ),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],

 # CSV/TXT 분석
 import pandas as pd
+# PDF 텍스트 추출
+import PyPDF2
+MAX_CONTENT_CHARS = 8000  # 너무 큰 파일을 막기 위해 최대 표시 8000자
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
+##################################################
+# CSV, TXT, PDF 분석 함수
+##################################################
 def analyze_csv_file(path: str) -> str:
     """
+    CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
     """
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
     except Exception as e:
         return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
 def analyze_txt_file(path: str) -> str:
     """
+    TXT 파일 전문 읽기. 너무 길면 일부만 표시.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
         if len(text) > MAX_CONTENT_CHARS:
             text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
     except Exception as e:
         return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
+def pdf_to_markdown(pdf_path: str) -> str:
+    """
+    PDF → Markdown. 페이지별로 간단히 텍스트 추출.
+    """
+    text_chunks = []
+    try:
+        with open(pdf_path, "rb") as f:
+            reader = PyPDF2.PdfReader(f)
+            for page_num, page in enumerate(reader.pages, start=1):
+                page_text = page.extract_text() or ""
+                page_text = page_text.strip()
+                if page_text:
+                    text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
+    except Exception as e:
+        return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
+    full_text = "\n".join(text_chunks)
+    if len(full_text) > MAX_CONTENT_CHARS:
+        full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
+    return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"
+##################################################
+# 이미지/비디오 업로드 제한 검사
+##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
     - 비디오 1개 초과 불가
+    - 비디오와 이미지 혼합 불가
     - 이미지 개수 MAX_NUM_IMAGES 초과 불가
+    - <image> 태그가 있으면 태그 수와 실제 이미지 수 일치
+    - CSV, TXT, PDF 등은 여기서 제한하지 않음
     """
     media_files = []
     for f in message["files"]:
+        # 이미지: png/jpg/jpeg/gif/webp
+        # 비디오: mp4
+        # cf) PDF, CSV, TXT 등은 제외
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
     return True
+##################################################
+# 비디오 처리
+##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     return content
+##################################################
+# interleaved <image> 처리
+##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
     content = []
     image_index = 0
     for part in parts:
         if part == "<image>":
             content.append({"type": "image", "url": message["files"][image_index]})
             image_index += 1
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
+        else:
+            # 공백이거나 \n 같은 경우
+            if isinstance(part, str) and part != "<image>":
+                content.append({"type": "text", "text": part})
     return content
+##################################################
+# PDF + CSV + TXT + 이미지/비디오
+##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
+    # 1) 파일 분류
     video_files = [f for f in message["files"] if f.endswith(".mp4")]
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
+    pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
+    # 2) 사용자 원본 text 추가
     content_list = [{"type": "text", "text": message["text"]}]
+    # 3) CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
+    # 4) TXT
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
+    # 5) PDF
+    for pdf_path in pdf_files:
+        pdf_markdown = pdf_to_markdown(pdf_path)
+        content_list.append({"type": "text", "text": pdf_markdown})
+    # 6) 비디오 (한 개만 허용)
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
+    # 7) 이미지 처리
     if "<image>" in message["text"]:
+        # interleaved
         return process_interleaved_images(message)
+    else:
+        # 일반 여러 장
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
     return content_list
+##################################################
+# history -> LLM 메시지 변환
+##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
+            # user_content가 쌓여있다면 user 메시지로 저장
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
+            # 그 뒤 item은 assistant
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
+            # user
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
             else:
+                # 이미지나 기타
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
+##################################################
+# 메인 추론 함수
+##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
     ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
+    gen_kwargs = dict(
         inputs,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
     )
+    t = Thread(target=model.generate, kwargs=gen_kwargs)
     t.start()
     output = ""
+    for new_text in streamer:
+        output += new_text
         yield output
+##################################################
+# 예시들 (기존)
+##################################################
 examples = [
     [
         {
+            "text": "Test with PDF",
+            "files": ["assets/sample.pdf"],
         }
     ],
     [
         {
+            "text": "Simple text with CSV upload.",
+            "files": ["assets/sample.csv"],
         }
     ],
+    # ...원래 예시들 유지...
 ]
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
+            value=(
+                "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
+                "correct solutions through systematic reasoning. Please answer in korean."
+            )
         ),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],