Spaces:

JJS0321
/

Industrial_AI_Engineering_Week8_Assignment

Sleeping

App Files Files Community

JJS0321 commited on 23 days ago

Commit

58a66e8

1 Parent(s): c273705

make app more beautiful

Browse files

Files changed (1) hide show

app.py +69 -29

app.py CHANGED Viewed

@@ -1,37 +1,29 @@
 import os
 import re
-import gradio as gr
-from transformers import DonutProcessor, VisionEncoderDecoderModel
 import torch
 import traceback
-# 1) Load pretrained Donut model and processor
 MODEL_NAME = "naver-clova-ix/donut-base-finetuned-cord-v2"
 processor = DonutProcessor.from_pretrained(MODEL_NAME)
 model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
-# 2) Set device and move model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-# 3) Inference function with debugging
 def ocr_donut(image):
     try:
         if image is None:
             return {"error": "No image provided."}
-        # Prepare prompt and inputs
         task_prompt = "<s_cord-v2>"
         decoder_input_ids = processor.tokenizer(
-            task_prompt,
-            add_special_tokens=False,
-            return_tensors="pt"
         ).input_ids.to(device)
-        # Convert to tensor
         pixel_values = processor(image.convert("RGB"), return_tensors="pt").pixel_values.to(device)
-        # Generate outputs
         outputs = model.generate(
             pixel_values,
             decoder_input_ids=decoder_input_ids,
@@ -43,29 +35,77 @@ def ocr_donut(image):
             return_dict_in_generate=True,
         )
-        # Decode and clean up
-        sequence = processor.batch_decode(outputs.sequences)[0]
-        sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
-        sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
-        json_output = processor.token2json(sequence)
-        return {"result": json_output}
     except Exception:
         tb = traceback.format_exc()
         print(tb)
         return {"error": tb}
-# 4) Build Gradio interface
-demo = gr.Interface(
-    fn=ocr_donut,
-    inputs=gr.Image(type="pil", label="Upload Document Image"),
-    outputs=gr.JSON(label="Output"),
-    title="Donut OCR Gradio App",
-    description="Upload a document image and get structured JSON output. Errors will be shown for debugging."
-)
-# 5) Launch for Spaces
 demo.launch(
     server_name="0.0.0.0",
     server_port=int(os.environ.get("PORT", 7860)),

 import os
 import re
 import torch
 import traceback
+import gradio as gr
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+# ─── 모델 로딩 ─────────────────────────────────────────────────────────
 MODEL_NAME = "naver-clova-ix/donut-base-finetuned-cord-v2"
 processor = DonutProcessor.from_pretrained(MODEL_NAME)
 model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# ─── OCR 함수 ──────────────────────────────────────────────────────────
 def ocr_donut(image):
     try:
         if image is None:
             return {"error": "No image provided."}
         task_prompt = "<s_cord-v2>"
         decoder_input_ids = processor.tokenizer(
+            task_prompt, add_special_tokens=False, return_tensors="pt"
         ).input_ids.to(device)
         pixel_values = processor(image.convert("RGB"), return_tensors="pt").pixel_values.to(device)
         outputs = model.generate(
             pixel_values,
             decoder_input_ids=decoder_input_ids,
             return_dict_in_generate=True,
         )
+        seq = processor.batch_decode(outputs.sequences)[0]
+        seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+        seq = re.sub(r"<.*?>", "", seq, count=1).strip()
+        return {"result": processor.token2json(seq)}
     except Exception:
         tb = traceback.format_exc()
         print(tb)
         return {"error": tb}
+# ─── CSS 스타일링 ────────────────────────────────────────────────────
+custom_css = """
+body { background: #f0f2f5; font-family: 'Segoe UI', Tahoma, sans-serif; }
+.gradio-container { max-width: 900px; margin: 40px auto; padding: 20px; }
+.header { text-align: center; margin-bottom: 30px; }
+.header h1 { font-size: 2.8rem; color: #333; margin: 0; }
+.header p { color: #666; margin-top: 8px; }
+.input-box, .output-box {
+    background: #fff;
+    border-radius: 8px;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+    padding: 20px;
+}
+.input-box { margin-right: 10px; }
+.output-box { margin-left: 10px; }
+.gr-button {
+    background: #5a8dee !important;
+    color: #fff !important;
+    border-radius: 6px !important;
+    padding: 10px 20px !important;
+    font-size: 1rem !important;
+    margin-top: 10px !important;
+    transition: background 0.2s ease;
+}
+.gr-button:hover { background: #3f6fcc !important; }
+.footer {
+    text-align: center;
+    margin-top: 30px;
+    color: #999;
+    font-size: 0.85rem;
+}
+"""
+# ─── Blocks 레이아웃 ──────────────────────────────────────────────────
+with gr.Blocks(css=custom_css, title="Donut OCR App") as demo:
+    # 헤더
+    with gr.HTML(elem_classes="header"):
+        gr.HTML("""
+            <h1>📄 Donut OCR</h1>
+            <p>Industrial AI Engineering Week 8 Assignment</p>
+        """)
+    # 입력/출력 영역
+    with gr.Row():
+        with gr.Column(elem_classes="input-box"):
+            image_input = gr.Image(type="pil", label="Upload Document Image")
+            run_btn = gr.Button("Run OCR", elem_id="run-btn")
+        with gr.Column(elem_classes="output-box"):
+            result_box = gr.JSON(label="Output")
+    # 버튼 클릭 연결
+    run_btn.click(fn=ocr_donut, inputs=image_input, outputs=result_box)
+    # 푸터
+    with gr.HTML(elem_classes="footer"):
+        gr.HTML("<p>Powered by Naver Clova Donut • Built with 💜 by You</p>")
+# Spaces 실행
 demo.launch(
     server_name="0.0.0.0",
     server_port=int(os.environ.get("PORT", 7860)),