RAGOndevice

Running on Zero

App Files Files Community

openfree commited on Mar 5

Commit

e6c14df

verified ·

1 Parent(s): 3e45a0e

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -153

app.py CHANGED Viewed

@@ -1,15 +1,31 @@
 import os
-# Dynamo 완전 비활성화
 os.environ["TORCH_DYNAMO_DISABLE"] = "1"
 import torch
-# 성능 최적화를 위한 설정 (TensorFloat32 연산 활성화)
 torch.set_float32_matmul_precision('high')
 import torch._inductor
 torch._inductor.config.triton.cudagraphs = False
 import torch._dynamo
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
@@ -24,16 +40,14 @@ from datetime import datetime
 import pyarrow.parquet as pq
 import pypdf
 import io
-import pyarrow.parquet as pq
-from tabulate import tabulate
 import platform
 import subprocess
 import pytesseract
 from pdf2image import convert_from_path
-import queue  # 추가: queue.Empty 예외 처리를 위해
-import time  # 추가: 스트리밍 타이밍을 위해
-# -------------------- 추가: PDF to Markdown 변환 관련 import --------------------
 try:
     import re
     import requests
@@ -50,9 +64,6 @@ except ModuleNotFoundError as e:
     )
 # ---------------------------------------------------------------------------
-# 1) Dynamo suppress_errors 옵션 사용 (오류 시 eager로 fallback)
-torch._dynamo.config.suppress_errors = True
 # 전역 변수
 current_file_context = None
@@ -62,21 +73,21 @@ MODEL_ID = "CohereForAI/c4ai-command-r7b-12-2024"
 MODELS = os.environ.get("MODELS")
 MODEL_NAME = MODEL_ID.split("/")[-1]
-model = None  # 전역 변수로 선언
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-# 위키피디아 데이터셋 로드
 wiki_dataset = load_dataset("lcw99/wikipedia-korean-20240501-1million-qna")
 print("Wikipedia dataset loaded:", wiki_dataset)
-# TF-IDF 벡터라이저 초기화 및 학습
 print("TF-IDF 벡터화 시작...")
 questions = wiki_dataset['train']['question'][:10000]  # 처음 10000개만 사용
 vectorizer = TfidfVectorizer(max_features=1000)
 question_vectors = vectorizer.fit_transform(questions)
 print("TF-IDF 벡터화 완료")
 class ChatHistory:
     def __init__(self):
         self.history = []
@@ -132,19 +143,18 @@ class ChatHistory:
             print(f"히스토리 로드 실패: {e}")
             self.history = []
-# 전역 ChatHistory 인스턴스 생성
 chat_history = ChatHistory()
 def find_relevant_context(query, top_k=3):
     # 쿼리 벡터화
     query_vector = vectorizer.transform([query])
-    # 코사인 유사도 계산
     similarities = (query_vector * question_vectors.T).toarray()[0]
-    # 가장 유사한 질문들의 인덱스
     top_indices = np.argsort(similarities)[-top_k:][::-1]
-    # 관련 컨텍스트 추출
     relevant_contexts = []
     for idx in top_indices:
         if similarities[idx] > 0:
@@ -155,16 +165,14 @@ def find_relevant_context(query, top_k=3):
             })
     return relevant_contexts
 def init_msg():
     return "파일을 분석하고 있습니다..."
 # -------------------- PDF 파일을 Markdown으로 변환하는 유틸 함수들 --------------------
 def extract_text_from_pdf(reader: PdfReader) -> str:
     """
     PyPDF를 사용해 모든 페이지 텍스트를 추출.
-    만약 텍스트가 없으면 빈 문자열 반환.
     """
     full_text = ""
     for idx, page in enumerate(reader.pages):
@@ -173,20 +181,17 @@ def extract_text_from_pdf(reader: PdfReader) -> str:
             full_text += f"---- Page {idx+1} ----\n" + text + "\n\n"
     return full_text.strip()
 def convert_pdf_to_markdown(pdf_file: str):
     """
-    PDF 파일을 읽고 텍스트를 추출한 뒤,
-    이미지가 많고 텍스트가 적은 경우에는 OCR을 시도한다.
-    최종적으로 Markdown 형식으로 변환 가능한 텍스트를 반환한다.
-    메타데이터도 함께 반환.
     """
     try:
         reader = PdfReader(pdf_file)
     except Exception as e:
         return f"PDF 파일을 읽는 중 오류 발생: {e}", None, None
-    # Extract metadata
     raw_meta = reader.metadata
     metadata = {
         "author": raw_meta.author if raw_meta else None,
@@ -196,19 +201,16 @@ def convert_pdf_to_markdown(pdf_file: str):
         "title": raw_meta.title if raw_meta else None,
     }
-    # Extract text
     full_text = extract_text_from_pdf(reader)
-    # 이미지가 많고 텍스트가 너무 짧으면 OCR 시도
-    image_count = 0
-    for page in reader.pages:
-        image_count += len(page.images)
     if image_count > 0 and len(full_text) < 1000:
         try:
             out_pdf_file = pdf_file.replace(".pdf", "_ocr.pdf")
             ocrmypdf.ocr(pdf_file, out_pdf_file, force_ocr=True)
-            # Re-extract text from OCR-processed PDF
             reader_ocr = PdfReader(out_pdf_file)
             full_text = extract_text_from_pdf(reader_ocr)
         except Exception as e:
@@ -216,11 +218,9 @@ def convert_pdf_to_markdown(pdf_file: str):
     return full_text, metadata, pdf_file
-# ---------------------------------------------------------------------------
 def analyze_file_content(content, file_type):
-    """파일 내용을 간단히 분석한 후 구조 요약을 반환."""
     if file_type in ['parquet', 'csv']:
         try:
             lines = content.split('\n')
@@ -245,15 +245,13 @@ def analyze_file_content(content, file_type):
     words = len(content.split())
     return f"📝 Document Structure: {total_lines} lines, {paragraphs} paragraphs, approximately {words} words"
 def read_uploaded_file(file):
     """
-    업로드된 파일을 처리하여
-    1) 파일 타입별로 내용을 읽고
-    2) 분석 결과와 함께 반환
     """
     if file is None:
         return "", ""
     try:
         file_ext = os.path.splitext(file.name)[1].lower()
@@ -267,7 +265,8 @@ def read_uploaded_file(file):
                 content += f"1. Basic Information:\n"
                 content += f"- Total Rows: {len(df):,}\n"
                 content += f"- Total Columns: {len(df.columns)}\n"
-                content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                 content += f"2. Column Information:\n"
                 for col in df.columns:
@@ -279,7 +278,8 @@ def read_uploaded_file(file):
                 content += f"\n\n4. Missing Values:\n"
                 null_counts = df.isnull().sum()
                 for col, count in null_counts[null_counts > 0].items():
-                    content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                 numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
                 if len(numeric_cols) > 0:
@@ -291,7 +291,7 @@ def read_uploaded_file(file):
             except Exception as e:
                 return f"Error reading Parquet file: {str(e)}", "error"
-        # PDF (Markdown 변환)
         if file_ext == '.pdf':
             try:
                 markdown_text, metadata, processed_pdf_path = convert_pdf_to_markdown(file.name)
@@ -305,7 +305,6 @@ def read_uploaded_file(file):
                 content += "## Extracted Text\n\n"
                 content += markdown_text
                 return content, "pdf"
             except Exception as e:
                 return f"Error reading PDF file: {str(e)}", "error"
@@ -320,7 +319,8 @@ def read_uploaded_file(file):
                     content += f"1. Basic Information:\n"
                     content += f"- Total Rows: {len(df):,}\n"
                     content += f"- Total Columns: {len(df.columns)}\n"
-                    content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                     content += f"2. Column Information:\n"
                     for col in df.columns:
@@ -332,14 +332,17 @@ def read_uploaded_file(file):
                     content += f"\n\n4. Missing Values:\n"
                     null_counts = df.isnull().sum()
                     for col, count in null_counts[null_counts > 0].items():
-                        content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                     return content, "csv"
                 except UnicodeDecodeError:
                     continue
-            raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
-        # 일반 텍스트 파일
         else:
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
@@ -350,15 +353,19 @@ def read_uploaded_file(file):
                     lines = content.split('\n')
                     total_lines = len(lines)
                     non_empty_lines = len([line for line in lines if line.strip()])
-                    is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
                     analysis = f"\n📝 File Analysis:\n"
                     if is_code:
-                        functions = len([line for line in lines if 'def ' in line])
-                        classes = len([line for line in lines if 'class ' in line])
-                        imports = len([line for line in lines if 'import ' in line or 'from ' in line])
                         analysis += f"- File Type: Code\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Functions: {functions}\n"
@@ -375,14 +382,18 @@ def read_uploaded_file(file):
                         analysis += f"- Character Count: {chars:,}\n"
                     return content + analysis, "text"
                 except UnicodeDecodeError:
                     continue
-            raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
     except Exception as e:
         return f"Error reading file: {str(e)}", "error"
 CSS = """
 /* 3D 스타일 CSS */
 :root {
@@ -539,22 +550,20 @@ body {
 """
 def clear_cuda_memory():
     if hasattr(torch.cuda, 'empty_cache'):
         with torch.cuda.device('cuda'):
             torch.cuda.empty_cache()
 @spaces.GPU
 def load_model():
     try:
-        # 메모리 정리 먼저 수행
         clear_cuda_memory()
         loaded_model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
-            # 낮은 메모리 사용을 위한 설정 추가
             low_cpu_mem_usage=True,
         )
         return loaded_model
@@ -562,22 +571,8 @@ def load_model():
         print(f"모델 로드 오류: {str(e)}")
         raise
-def _truncate_tokens_for_context(input_ids_str: str, desired_input_length: int) -> str:
-    """
-    입력 문자열이 desired_input_length 토큰을 넘으면, 앞부분(오래된 컨텍스트)을 잘라내는 함수.
-    """
-    tokens = input_ids_str.split()
-    if len(tokens) > desired_input_length:
-        tokens = tokens[-desired_input_length:]
-    return " ".join(tokens)
-# build_prompt 함수: 대화 내역을 문자열로 변환
 def build_prompt(conversation: list) -> str:
-    """
-    conversation은 각 항목이 {"role": "user" 또는 "assistant", "content": ...} 형태의 딕셔너리 목록입니다.
-    이를 단순 텍스트 프롬프트로 변환합니다.
-    """
     prompt = ""
     for msg in conversation:
         if msg["role"] == "user":
@@ -587,7 +582,7 @@ def build_prompt(conversation: list) -> str:
     prompt += "Assistant: "
     return prompt
 @spaces.GPU
 def stream_chat(
     message: str,
@@ -602,13 +597,14 @@ def stream_chat(
     global model, current_file_context
     try:
         if model is None:
             model = load_model()
-        print(f'message is - {message}')
-        print(f'history is - {history}')
-        # 파일 업로드 처리
         file_context = ""
         if uploaded_file and message == "파일을 분석하고 있습니다...":
             current_file_context = None
@@ -623,23 +619,16 @@ def stream_chat(
                     current_file_context = file_context
                     message = "업로드된 파일을 분석해주세요."
             except Exception as e:
-                print(f"파일 분석 오류: {str(e)}")
                 file_context = f"\n\n❌ 파일 분석 중 오류가 발생했습니다: {str(e)}"
         elif current_file_context:
             file_context = current_file_context
-        if torch.cuda.is_available():
-            print(f"CUDA 메모리 사용량: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
-        max_history_length = 10
-        if len(history) > max_history_length:
-            history = history[-max_history_length:]
-        # 위키피디아 컨텍스트 검색
         wiki_context = ""
         try:
             relevant_contexts = find_relevant_context(message)
-            if relevant_contexts:  # 결과가 있을 경우만 추가
                 wiki_context = "\n\n관련 위키피디아 정보:\n"
                 for ctx in relevant_contexts:
                     wiki_context += (
@@ -648,9 +637,13 @@ def stream_chat(
                         f"유사도: {ctx['similarity']:.3f}\n\n"
                     )
         except Exception as e:
-            print(f"컨텍스트 검색 오류: {str(e)}")
-        # 대화 내역 구성
         conversation = []
         for prompt, answer in history:
             conversation.extend([
@@ -658,7 +651,7 @@ def stream_chat(
                 {"role": "assistant", "content": answer}
             ])
-        # 최종 메시지 구성
         final_message = message
         if file_context:
             final_message = file_context + "\n현재 질문: " + message
@@ -666,53 +659,42 @@ def stream_chat(
             final_message = wiki_context + "\n현재 질문: " + message
         if file_context and wiki_context:
             final_message = file_context + wiki_context + "\n현재 질문: " + message
         conversation.append({"role": "user", "content": final_message})
-        # 프롬프트 구성 및 토큰화
         input_ids_str = build_prompt(conversation)
-        # 먼저 컨텍스트 길이 확인 및 제한
         max_context = 8192
         tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
         input_length = tokenized_input["input_ids"].shape[1]
-        # 컨텍스트가 너무 길면 자르기
         if input_length > max_context - max_new_tokens:
-            print(f"입력이 너무 깁니다: {input_length} 토큰. 자르는 중...")
-            # 최소 생성 토큰 수 확보
             min_generation = min(256, max_new_tokens)
             new_desired_input_length = max_context - min_generation
-            # 입력 텍스트를 토큰 단위로 자르기
             tokens = tokenizer.encode(input_ids_str)
             if len(tokens) > new_desired_input_length:
                 tokens = tokens[-new_desired_input_length:]
                 input_ids_str = tokenizer.decode(tokens)
-            # 다시 토큰화
             tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
             input_length = tokenized_input["input_ids"].shape[1]
-        print(f"최종 입력 길이: {input_length} 토큰")
-        # CUDA로 입력 이동
         inputs = tokenized_input.to("cuda")
-        # 남은 토큰 수 계산 및 max_new_tokens 조정
         remaining = max_context - input_length
         if remaining < max_new_tokens:
-            print(f"max_new_tokens 조정: {max_new_tokens} -> {remaining}")
             max_new_tokens = remaining
-        print(f"입력 텐서 생성 후 CUDA 메모리: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
         # 스트리머 설정
         streamer = TextIteratorStreamer(
             tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
         )
-        # 생성 매개변수 설정
         generate_kwargs = dict(
             **inputs,
             streamer=streamer,
@@ -727,63 +709,56 @@ def stream_chat(
             use_cache=True
         )
-        # 메모리 정리
         clear_cuda_memory()
-        # 별도 스레드에서 생성 실행
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
-        # 응답 스트리밍
         buffer = ""
         partial_message = ""
         last_yield_time = time.time()
         try:
             for new_text in streamer:
-                try:
-                    buffer += new_text
-                    partial_message += new_text
-                    # 일정 시간마다 또는 텍스트가 쌓일 때마다 결과 업데이트
-                    current_time = time.time()
-                    if current_time - last_yield_time > 0.1 or len(partial_message) > 20:
-                        yield "", history + [[message, buffer]]
-                        partial_message = ""
-                        last_yield_time = current_time
-                except Exception as inner_e:
-                    print(f"개별 토큰 처리 중 오류: {str(inner_e)}")
-                    continue
-            # 마지막 응답 확인
             if buffer:
                 yield "", history + [[message, buffer]]
-            # 대화 기록에 저장
             chat_history.add_conversation(message, buffer)
         except Exception as e:
-            print(f"스트리밍 중 오류 발생: {str(e)}")
-            if not buffer:  # 버퍼가 비어있으면 오류 메시지 표시
-                buffer = f"응답 생성 중 오류가 발생했습니다: {str(e)}"
             yield "", history + [[message, buffer]]
-        # 스레드가 여전히 실행 중이면 종료 대기
         if thread.is_alive():
             thread.join(timeout=5.0)
-        # 메모리 정리
         clear_cuda_memory()
     except Exception as e:
         import traceback
         error_details = traceback.format_exc()
         error_message = f"오류가 발생했습니다: {str(e)}\n{error_details}"
-        print(f"Stream chat 오류: {error_message}")
         clear_cuda_memory()
         yield "", history + [[message, error_message]]
 def create_demo():
     with gr.Blocks(css=CSS) as demo:
         with gr.Column(elem_classes="markdown-style"):
@@ -834,6 +809,7 @@ def create_demo():
                     scale=1
                 )
         with gr.Accordion("🎮 Advanced Settings", open=False):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -859,6 +835,7 @@ def create_demo():
                         label="Repetition Penalty 🔄"
                     )
         gr.Examples(
             examples=[
                 ["Please analyze this code and suggest improvements:\ndef fibonacci(n):\n    if n <= 1: return n\n    return fibonacci(n-1) + fibonacci(n-2)"],
@@ -869,23 +846,25 @@ def create_demo():
             inputs=msg
         )
         def clear_conversation():
             global current_file_context
             current_file_context = None
             return [], None, "Start a new conversation..."
         msg.submit(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
             outputs=[msg, chatbot]
         )
         send.click(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
             outputs=[msg, chatbot]
         )
         file_upload.change(
             fn=lambda: ("처리 중...", [["시스템", "파일을 분석 중입니다. 잠시만 기다려주세요..."]]),
             outputs=[msg, chatbot],
@@ -901,6 +880,7 @@ def create_demo():
             queue=True
         )
         clear.click(
             fn=clear_conversation,
             outputs=[chatbot, file_upload, msg],
@@ -909,7 +889,7 @@ def create_demo():
         return demo
 if __name__ == "__main__":
     demo = create_demo()
-    demo.launch()

 import os
+# 1) Dynamo 완전 비활성화
 os.environ["TORCH_DYNAMO_DISABLE"] = "1"
+# 2) Triton의 cudagraphs 최적화 비활성화
+os.environ["TRITON_DISABLE_CUDAGRAPHS"] = "1"
+# 3) 경고 무시 설정 (skipping cudagraphs 관련)
+import warnings
+warnings.filterwarnings("ignore", message="skipping cudagraphs due to mutated inputs")
+warnings.filterwarnings("ignore", message="Not enough SMs to use max_autotune_gemm mode")
 import torch
+# TensorFloat32 연산 활성화 (성능 최적화)
 torch.set_float32_matmul_precision('high')
+# TorchInductor cudagraphs 비활성화
 import torch._inductor
 torch._inductor.config.triton.cudagraphs = False
+# Dynamo suppress_errors 옵션 (오류 시 eager로 fallback)
 import torch._dynamo
+torch._dynamo.config.suppress_errors = True
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 import pyarrow.parquet as pq
 import pypdf
 import io
 import platform
 import subprocess
 import pytesseract
 from pdf2image import convert_from_path
+import queue  # queue.Empty 예외 처리를 위해
+import time   # 스트리밍 타이밍을 위해
+# -------------------- PDF to Markdown 변환 관련 import --------------------
 try:
     import re
     import requests
     )
 # ---------------------------------------------------------------------------
 # 전역 변수
 current_file_context = None
 MODELS = os.environ.get("MODELS")
 MODEL_NAME = MODEL_ID.split("/")[-1]
+model = None  # 전역에서 관리
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+# (1) 위키피디아 데이터셋 로드
 wiki_dataset = load_dataset("lcw99/wikipedia-korean-20240501-1million-qna")
 print("Wikipedia dataset loaded:", wiki_dataset)
+# (2) TF-IDF 벡터라이저 초기화 및 학습
 print("TF-IDF 벡터화 시작...")
 questions = wiki_dataset['train']['question'][:10000]  # 처음 10000개만 사용
 vectorizer = TfidfVectorizer(max_features=1000)
 question_vectors = vectorizer.fit_transform(questions)
 print("TF-IDF 벡터화 완료")
+# ------------------------- ChatHistory 클래스 -------------------------
 class ChatHistory:
     def __init__(self):
         self.history = []
             print(f"히스토리 로드 실패: {e}")
             self.history = []
+# 전역 ChatHistory 인스턴스
 chat_history = ChatHistory()
+# ------------------------- 위키 문서 검색 (TF-IDF) -------------------------
 def find_relevant_context(query, top_k=3):
     # 쿼리 벡터화
     query_vector = vectorizer.transform([query])
+    # 코사인 유사도
     similarities = (query_vector * question_vectors.T).toarray()[0]
+    # 유사도 높은 질문 인덱스
     top_indices = np.argsort(similarities)[-top_k:][::-1]
     relevant_contexts = []
     for idx in top_indices:
         if similarities[idx] > 0:
             })
     return relevant_contexts
+# 파일 업로드 시 표시할 초기 메시지
 def init_msg():
     return "파일을 분석하고 있습니다..."
 # -------------------- PDF 파일을 Markdown으로 변환하는 유틸 함수들 --------------------
 def extract_text_from_pdf(reader: PdfReader) -> str:
     """
     PyPDF를 사용해 모든 페이지 텍스트를 추출.
     """
     full_text = ""
     for idx, page in enumerate(reader.pages):
             full_text += f"---- Page {idx+1} ----\n" + text + "\n\n"
     return full_text.strip()
 def convert_pdf_to_markdown(pdf_file: str):
     """
+    PDF 파일에서 텍스트를 추출하고,
+    이미지가 많고 텍스트가 적으면 OCR 시도
     """
     try:
         reader = PdfReader(pdf_file)
     except Exception as e:
         return f"PDF 파일을 읽는 중 오류 발생: {e}", None, None
+    # 메타데이터 추출
     raw_meta = reader.metadata
     metadata = {
         "author": raw_meta.author if raw_meta else None,
         "title": raw_meta.title if raw_meta else None,
     }
+    # 텍스트 추출
     full_text = extract_text_from_pdf(reader)
+    # 이미지-텍스트 비율 판단 후 OCR 시도
+    image_count = sum(len(page.images) for page in reader.pages)
     if image_count > 0 and len(full_text) < 1000:
         try:
             out_pdf_file = pdf_file.replace(".pdf", "_ocr.pdf")
             ocrmypdf.ocr(pdf_file, out_pdf_file, force_ocr=True)
+            # OCR된 PDF 다시 읽기
             reader_ocr = PdfReader(out_pdf_file)
             full_text = extract_text_from_pdf(reader_ocr)
         except Exception as e:
     return full_text, metadata, pdf_file
+# ------------------------- 파일 분석 함수 -------------------------
 def analyze_file_content(content, file_type):
+    """간단한 구조 분석/요약."""
     if file_type in ['parquet', 'csv']:
         try:
             lines = content.split('\n')
     words = len(content.split())
     return f"📝 Document Structure: {total_lines} lines, {paragraphs} paragraphs, approximately {words} words"
 def read_uploaded_file(file):
     """
+    업로드된 파일 처리 -> 내용/타입
     """
     if file is None:
         return "", ""
     try:
         file_ext = os.path.splitext(file.name)[1].lower()
                 content += f"1. Basic Information:\n"
                 content += f"- Total Rows: {len(df):,}\n"
                 content += f"- Total Columns: {len(df.columns)}\n"
+                mem_usage = df.memory_usage(deep=True).sum() / 1024 / 1024
+                content += f"- Memory Usage: {mem_usage:.2f} MB\n\n"
                 content += f"2. Column Information:\n"
                 for col in df.columns:
                 content += f"\n\n4. Missing Values:\n"
                 null_counts = df.isnull().sum()
                 for col, count in null_counts[null_counts > 0].items():
+                    rate = count / len(df) * 100
+                    content += f"- {col}: {count:,} ({rate:.1f}%)\n"
                 numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
                 if len(numeric_cols) > 0:
             except Exception as e:
                 return f"Error reading Parquet file: {str(e)}", "error"
+        # PDF
         if file_ext == '.pdf':
             try:
                 markdown_text, metadata, processed_pdf_path = convert_pdf_to_markdown(file.name)
                 content += "## Extracted Text\n\n"
                 content += markdown_text
                 return content, "pdf"
             except Exception as e:
                 return f"Error reading PDF file: {str(e)}", "error"
                     content += f"1. Basic Information:\n"
                     content += f"- Total Rows: {len(df):,}\n"
                     content += f"- Total Columns: {len(df.columns)}\n"
+                    mem_usage = df.memory_usage(deep=True).sum() / 1024 / 1024
+                    content += f"- Memory Usage: {mem_usage:.2f} MB\n\n"
                     content += f"2. Column Information:\n"
                     for col in df.columns:
                     content += f"\n\n4. Missing Values:\n"
                     null_counts = df.isnull().sum()
                     for col, count in null_counts[null_counts > 0].items():
+                        rate = count / len(df) * 100
+                        content += f"- {col}: {count:,} ({rate:.1f}%)\n"
                     return content, "csv"
                 except UnicodeDecodeError:
                     continue
+            raise UnicodeDecodeError(
+                f"Unable to read file with supported encodings ({', '.join(encodings)})"
+            )
+        # 텍스트 파일
         else:
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
                     lines = content.split('\n')
                     total_lines = len(lines)
                     non_empty_lines = len([line for line in lines if line.strip()])
+                    is_code = any(
+                        keyword in content.lower()
+                        for keyword in ['def ', 'class ', 'import ', 'function']
+                    )
                     analysis = f"\n📝 File Analysis:\n"
                     if is_code:
+                        functions = sum('def ' in line for line in lines)
+                        classes = sum('class ' in line for line in lines)
+                        imports = sum(
+                            ('import ' in line) or ('from ' in line)
+                            for line in lines
+                        )
                         analysis += f"- File Type: Code\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Functions: {functions}\n"
                         analysis += f"- Character Count: {chars:,}\n"
                     return content + analysis, "text"
                 except UnicodeDecodeError:
                     continue
+            raise UnicodeDecodeError(
+                f"Unable to read file with supported encodings ({', '.join(encodings)})"
+            )
     except Exception as e:
         return f"Error reading file: {str(e)}", "error"
+# ------------------------- CSS -------------------------
 CSS = """
 /* 3D 스타일 CSS */
 :root {
 """
 def clear_cuda_memory():
+    """CUDA 캐시 정리."""
     if hasattr(torch.cuda, 'empty_cache'):
         with torch.cuda.device('cuda'):
             torch.cuda.empty_cache()
+# ------------------------- 모델 로딩 함수 -------------------------
 @spaces.GPU
 def load_model():
     try:
         clear_cuda_memory()
         loaded_model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
             low_cpu_mem_usage=True,
         )
         return loaded_model
         print(f"모델 로드 오류: {str(e)}")
         raise
 def build_prompt(conversation: list) -> str:
+    """대화 내역을 단순 텍스트 프롬프트로 변환."""
     prompt = ""
     for msg in conversation:
         if msg["role"] == "user":
     prompt += "Assistant: "
     return prompt
+# ------------------------- 메시지 스트리밍 함수 -------------------------
 @spaces.GPU
 def stream_chat(
     message: str,
     global model, current_file_context
     try:
+        # 모델 미로드시 로딩
         if model is None:
             model = load_model()
+        print(f'[User input] message: {message}')
+        print(f'[History] {history}')
+        # (1) 파일 업로드 처리
         file_context = ""
         if uploaded_file and message == "파일을 분석하고 있습니다...":
             current_file_context = None
                     current_file_context = file_context
                     message = "업로드된 파일을 분석해주세요."
             except Exception as e:
+                print(f"[파일 분석 오류] {str(e)}")
                 file_context = f"\n\n❌ 파일 분석 중 오류가 발생했습니다: {str(e)}"
         elif current_file_context:
             file_context = current_file_context
+        # (2) TF-IDF 기반 관련 문서 탐색
         wiki_context = ""
         try:
             relevant_contexts = find_relevant_context(message)
+            if relevant_contexts:
                 wiki_context = "\n\n관련 위키피디아 정보:\n"
                 for ctx in relevant_contexts:
                     wiki_context += (
                         f"유사도: {ctx['similarity']:.3f}\n\n"
                     )
         except Exception as e:
+            print(f"[컨텍스트 검색 오류] {str(e)}")
+        # (3) 대화 이력 구성
+        max_history_length = 10
+        if len(history) > max_history_length:
+            history = history[-max_history_length:]
         conversation = []
         for prompt, answer in history:
             conversation.extend([
                 {"role": "assistant", "content": answer}
             ])
+        # (4) 최종 메시지 결정
         final_message = message
         if file_context:
             final_message = file_context + "\n현재 질문: " + message
             final_message = wiki_context + "\n현재 질문: " + message
         if file_context and wiki_context:
             final_message = file_context + wiki_context + "\n현재 질문: " + message
         conversation.append({"role": "user", "content": final_message})
+        # (5) 토큰화 및 프롬프트 구축
         input_ids_str = build_prompt(conversation)
         max_context = 8192
         tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
         input_length = tokenized_input["input_ids"].shape[1]
+        # (6) 컨텍스트가 너무 길면 앞부분 토큰 자르기
         if input_length > max_context - max_new_tokens:
+            print(f"[경고] 입력이 너무 깁니다: {input_length} 토큰 -> 잘라냄.")
             min_generation = min(256, max_new_tokens)
             new_desired_input_length = max_context - min_generation
             tokens = tokenizer.encode(input_ids_str)
             if len(tokens) > new_desired_input_length:
                 tokens = tokens[-new_desired_input_length:]
                 input_ids_str = tokenizer.decode(tokens)
             tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
             input_length = tokenized_input["input_ids"].shape[1]
+        print(f"[토큰 길이] {input_length}")
         inputs = tokenized_input.to("cuda")
+        # 남은 토큰 수로 max_new_tokens 조정
         remaining = max_context - input_length
         if remaining < max_new_tokens:
+            print(f"[max_new_tokens 조정] {max_new_tokens} -> {remaining}")
             max_new_tokens = remaining
         # 스트리머 설정
         streamer = TextIteratorStreamer(
             tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
         )
+        # (7) 생성 파라미터
         generate_kwargs = dict(
             **inputs,
             streamer=streamer,
             use_cache=True
         )
         clear_cuda_memory()
+        # (8) 별도 스레드에서 생성
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
+        # (9) 스트리밍 응답
         buffer = ""
         partial_message = ""
         last_yield_time = time.time()
         try:
             for new_text in streamer:
+                buffer += new_text
+                partial_message += new_text
+                # 일정 시간 또는 버퍼 길이 기준으로 yield
+                current_time = time.time()
+                if (current_time - last_yield_time > 0.1) or (len(partial_message) > 20):
+                    yield "", history + [[message, buffer]]
+                    partial_message = ""
+                    last_yield_time = current_time
+            # 마지막 완성된 응답
             if buffer:
                 yield "", history + [[message, buffer]]
+            # 대화 내용 저장
             chat_history.add_conversation(message, buffer)
         except Exception as e:
+            print(f"[스트리밍 중 오류] {str(e)}")
+            if not buffer:  # buffer가 비어있다면 오류메시지 대화창 표시
+                buffer = f"응답 생성 중 오류 발생: {str(e)}"
             yield "", history + [[message, buffer]]
         if thread.is_alive():
             thread.join(timeout=5.0)
         clear_cuda_memory()
     except Exception as e:
         import traceback
         error_details = traceback.format_exc()
         error_message = f"오류가 발생했습니다: {str(e)}\n{error_details}"
+        print(f"[Stream chat 오류] {error_message}")
         clear_cuda_memory()
         yield "", history + [[message, error_message]]
+# ------------------------- Gradio UI 구성 -------------------------
 def create_demo():
     with gr.Blocks(css=CSS) as demo:
         with gr.Column(elem_classes="markdown-style"):
                     scale=1
                 )
+        # 고급 설정
         with gr.Accordion("🎮 Advanced Settings", open=False):
             with gr.Row():
                 with gr.Column(scale=1):
                         label="Repetition Penalty 🔄"
                     )
+        # 예시
         gr.Examples(
             examples=[
                 ["Please analyze this code and suggest improvements:\ndef fibonacci(n):\n    if n <= 1: return n\n    return fibonacci(n-1) + fibonacci(n-2)"],
             inputs=msg
         )
+        # 대화 내용 초기화
         def clear_conversation():
             global current_file_context
             current_file_context = None
             return [], None, "Start a new conversation..."
+        # 메시지 전송
         msg.submit(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
             outputs=[msg, chatbot]
         )
         send.click(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
             outputs=[msg, chatbot]
         )
+        # 파일 업로드 이벤트
         file_upload.change(
             fn=lambda: ("처리 중...", [["시스템", "파일을 분석 중입니다. 잠시만 기다려주세요..."]]),
             outputs=[msg, chatbot],
             queue=True
         )
+        # Clear 버튼
         clear.click(
             fn=clear_conversation,
             outputs=[chatbot, file_upload, msg],
         return demo
+# 메인 실행
 if __name__ == "__main__":
     demo = create_demo()
+    demo.launch()