Spaces:

greene6517
/

Finetuned_donut_sroie

Sleeping

App Files Files Community

Seokjin commited on 8 days ago

Commit

ef07723

1 Parent(s): 9cdefcc

Configure git user, fix paths and LFS settings, ignore checkpoints

Browse files

Files changed (15) hide show

.gitattributes +2 -35
.gitignore +1 -0
app.py +330 -0
donut_sroie_finetuned/added_tokens.json +14 -0
donut_sroie_finetuned/config.json +78 -0
donut_sroie_finetuned/generation_config.json +9 -0
donut_sroie_finetuned/model.safetensors +3 -0
donut_sroie_finetuned/preprocessor_config.json +26 -0
donut_sroie_finetuned/sentencepiece.bpe.model +3 -0
donut_sroie_finetuned/special_tokens_map.json +116 -0
donut_sroie_finetuned/tokenizer.json +0 -0
donut_sroie_finetuned/tokenizer_config.json +164 -0
donut_sroie_finetuned/training_args.bin +3 -0
git.gitattributes +2 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,2 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" > .gitattributes
2	+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .ipynb_checkpoints/

app.py ADDED Viewed

	@@ -0,0 +1,330 @@

+# -*- coding: utf-8 -*-
+# --- 필요한 모듈 임포트 ---
+import gradio as gr
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from PIL import Image
+import torch
+import re
+import json
+import os
+import warnings
+# --- 경고 메시지 무시 ---
+# UserWarning: TypedStorage is deprecated 는 PyTorch 관련 경고로 무시해도 괜찮습니다.
+warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
+# Future or other warnings if needed
+warnings.filterwarnings("ignore", category=FutureWarning)
+# --- 모델 및 프로세서 경로 정의 ---
+# Hugging Face Spaces 저장소 내부에 모델 파일을 복사했다고 가정합니다.
+# 저장소 루트에 donut_sroie_finetuned 폴더가 있고 그 안에 final_model 이 있는 구조
+model_path_finetuned = "./donut_sroie_finetuned"
+model_name_base = "naver-clova-ix/donut-base" # Base 모델은 Hub에서 직접 로드
+# --- Fine-tuned Processor 및 모델 로딩 ---
+# Spaces 환경에서는 우선 local_files_only=True 로 시도합니다 (Git LFS로 파일이 있다면)
+print(f"Loading Fine-tuned processor from: {model_path_finetuned}")
+try:
+    # local_files_only=True 를 사용하여 Spaces 저장소 내 파일만 사용하도록 강제
+    processor = DonutProcessor.from_pretrained(model_path_finetuned, local_files_only=True)
+    print("Successfully loaded fine-tuned processor locally from Space repo.")
+except Exception as e:
+    print(f"Error loading fine-tuned processor locally: {e}. Check if model files exist at the path.")
+    # 필요시 Hub에서 로드 시도하는 로직 추가 가능 (단, 모델이 Hub에 업로드 되어 있어야 함)
+    # try:
+    #     processor = DonutProcessor.from_pretrained("your-hf-username/your-model-repo-name") # Hub 경로 예시
+    #     print("Loaded fine-tuned processor from Hub as fallback.")
+    # except Exception as e2:
+    #     print(f"FATAL: Could not load fine-tuned processor locally or from Hub: {e2}")
+    #     exit()
+    # 여기서는 로컬 로딩 실패 시 일단 종료하도록 함 (수정 필요시 주석 해제)
+    print(f"FATAL: Could not load fine-tuned processor locally: {e}")
+    exit()
+print(f"Loading Fine-tuned model from: {model_path_finetuned}")
+try:
+    # local_files_only=True 를 사용하여 Spaces 저장소 내 파일만 사용하도록 강제
+    model_finetuned = VisionEncoderDecoderModel.from_pretrained(model_path_finetuned, local_files_only=True)
+    print("Successfully loaded fine-tuned model locally from Space repo.")
+except Exception as e:
+    print(f"Error loading fine-tuned model locally: {e}. Check if model files exist at the path.")
+    # 필요시 Hub에서 로드 시도하는 로직 추가 가능 (단, 모델이 Hub에 업로드 되어 있어야 함)
+    # try:
+    #     model_finetuned = VisionEncoderDecoderModel.from_pretrained("your-hf-username/your-model-repo-name") # Hub 경로 예시
+    #     print("Loaded fine-tuned model from Hub as fallback.")
+    # except Exception as e2:
+    #     print(f"FATAL: Could not load fine-tuned model locally or from Hub: {e2}")
+    #     exit()
+    # 여기서는 로컬 로딩 실패 시 일단 종료하도록 함 (수정 필요시 주석 해제)
+    print(f"FATAL: Could not load fine-tuned model locally: {e}")
+    exit()
+# --- Base Processor 및 모델 로딩 (Hub에서 직접) ---
+print(f"Loading Base processor from: {model_name_base}")
+try:
+    processor_base = DonutProcessor.from_pretrained(model_name_base)
+    print("Successfully loaded base processor.")
+except Exception as e:
+    print(f"FATAL: Could not load base processor: {e}")
+    exit()
+print(f"Loading Base model from: {model_name_base}")
+try:
+    model_base = VisionEncoderDecoderModel.from_pretrained(model_name_base)
+    print("Successfully loaded base model.")
+except Exception as e:
+    print(f"FATAL: Could not load base model: {e}")
+    exit()
+# --- 장치 설정 및 모델 이동 ---
+# Spaces 환경에서는 CPU 또는 할당된 GPU를 사용합니다.
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"\nUsing device: {device}")
+# 모델을 해당 장치로 이동
+try:
+    model_finetuned.to(device)
+    model_base.to(device)
+    print("Models moved to device.")
+    # 평가 모드 설정 (필수)
+    model_finetuned.eval()
+    model_base.eval()
+    print("Models set to evaluation mode.")
+except Exception as e:
+    print(f"Error moving models to device or setting eval mode: {e}")
+    exit()
+# --- Helper function to clean generated sequence (주로 Fine-tuned용) ---
+def clean_sequence(sequence, processor_to_use, prompt_token_str=None):
+    """Removes prompt, EOS, PAD tokens from a generated sequence."""
+    cleaned = sequence
+    try:
+        # Standard tokens first
+        eos_token = processor_to_use.tokenizer.eos_token if processor_to_use.tokenizer.eos_token else "</s>" # Default EOS
+        pad_token = processor_to_use.tokenizer.pad_token if processor_to_use.tokenizer.pad_token else "<pad>" # Default PAD
+        cleaned = cleaned.replace(eos_token, "").replace(pad_token, "").strip()
+        # Add BOS token removal if it exists and appears
+        if hasattr(processor_to_use.tokenizer, 'bos_token') and processor_to_use.tokenizer.bos_token:
+            cleaned = cleaned.replace(processor_to_use.tokenizer.bos_token, "").strip()
+        # Specific prompt removal (case-insensitive start check can be robust)
+        if prompt_token_str:
+             # Simple startswith check might be enough if prompt is always at the beginning
+             if cleaned.startswith(prompt_token_str):
+                   cleaned = cleaned[len(prompt_token_str):].strip()
+             # Regex version (more robust but slightly slower)
+             # cleaned = re.sub(f"^{re.escape(prompt_token_str)}", "", cleaned, flags=re.IGNORECASE).strip()
+    except Exception as e:
+        print(f"Warning: Error during sequence cleaning: {e}")
+        return sequence # Return original if cleaning fails
+    return cleaned
+# --- Helper function to parse SROIE format ---
+def token2json_simple(text):
+    """Parses <s_key>value</s_key> format into a dictionary."""
+    output = {}
+    # Regex to find <s_...>...</s_...> patterns, handling potential spaces and newlines in value
+    # It captures the key name (e.g., "company") and the value between the tags.
+    parts = re.findall(r"<s_(.*?)>([\s\S]*?)</s_\1>", text)
+    for key, value in parts:
+        # Strip leading/trailing whitespace from key and value
+        output[key.strip()] = value.strip()
+    # Add info if parsing failed but text was present
+    if not output and text and not text.isspace():
+        output["parsing_info"] = "Could not parse SROIE key-value pairs from the cleaned sequence."
+        output["cleaned_sequence_preview"] = text[:200] + "..." # Show preview
+    elif not text or text.isspace():
+        output["parsing_info"] = "Empty sequence after cleaning, nothing to parse."
+    return output
+# --- 통합 이미지 처리 및 추론 함수 ---
+# 데코레이터 추가: 그래디언트 계산 비활성화 (추론 시 메모리 절약 및 속도 향상)
+@torch.no_grad()
+def process_image_comparison(image_input):
+    if image_input is None:
+        no_image_msg = {"error": "이미지를 업로드해주세요."}
+        # Ensure JSON output for Gradio component
+        return json.dumps(no_image_msg, indent=2, ensure_ascii=False), json.dumps(no_image_msg, indent=2, ensure_ascii=False)
+    try:
+        # Gradio's numpy input needs conversion
+        image = Image.fromarray(image_input).convert("RGB")
+    except Exception as e:
+        error_msg = {"error": f"이미지 변환 오류: {e}"}
+        error_json_str = json.dumps(error_msg, indent=2, ensure_ascii=False)
+        return error_json_str, error_json_str
+    results_ft_json_str = "{}"
+    results_base_json_str = "{}"
+    sequence_ft_raw = "N/A"
+    sequence_base_raw = "N/A"
+    # === Fine-tuned 모델 추론 ===
+    try:
+        pixel_values_ft = processor(image, return_tensors="pt").pixel_values.to(device)
+        task_prompt_ft = "<s_sroie>" # Fine-tuned 모델의 시작 프롬프트
+        decoder_input_ids_ft = processor.tokenizer(
+            task_prompt_ft, add_special_tokens=False, return_tensors="pt"
+        ).input_ids.to(device)
+        # 생성 시 필요한 파라미터 설정
+        generation_config_ft = {
+            "max_length": model_finetuned.config.decoder.max_position_embeddings,
+            "pad_token_id": processor.tokenizer.pad_token_id,
+            "eos_token_id": processor.tokenizer.eos_token_id,
+            "use_cache": True,
+            "bad_words_ids": [[processor.tokenizer.unk_token_id]] if processor.tokenizer.unk_token_id else None,
+            "return_dict_in_generate": True,
+            "decoder_input_ids": decoder_input_ids_ft # 시작 프롬프트 제공
+        }
+        outputs_ft = model_finetuned.generate(pixel_values_ft, **generation_config_ft)
+        sequence_ft_raw = processor.batch_decode(outputs_ft.sequences)[0]
+        # print(f"\nFine-tuned Raw Output: {sequence_ft_raw}") # 서버 로그에 출력 (디버깅용)
+        # Fine-tuned 모델 결과 클리닝
+        sequence_ft_cleaned = clean_sequence(sequence_ft_raw, processor, prompt_token_str=task_prompt_ft)
+        # print(f"Fine-tuned Cleaned Output: {sequence_ft_cleaned}") # 서버 로그에 출력 (디버깅용)
+        # 클리닝된 결과 파싱
+        result_json_ft = token2json_simple(sequence_ft_cleaned)
+        result_json_ft["raw_decoded_sequence_preview"] = sequence_ft_raw[:200] + "..." # 원본 결과 프리뷰 추가
+        # 최종 JSON 문자열 변환
+        results_ft_json_str = json.dumps(result_json_ft, indent=2, ensure_ascii=False, sort_keys=False)
+    except Exception as e:
+        print(f"Error during fine-tuned model inference: {e}")
+        import traceback
+        traceback.print_exc() # detailed error log on server
+        results_ft_json_str = json.dumps({
+            "error": f"Fine-tuned 모델 추론 오류: {e}",
+            "raw_decoded_sequence_before_error": sequence_ft_raw
+            }, indent=2, ensure_ascii=False)
+    # === Base 모델 추론 ===
+    try:
+        pixel_values_base = processor_base(image, return_tensors="pt").pixel_values.to(device)
+        # Base 모델용 프롬프트 (예: <s_iitcdip> 또는 다른 일반 문서 프롬프트)
+        # 여기서는 이전 코드와 동일하게 <s_iitcdip> 사용
+        task_prompt_base = "<s_iitcdip>"
+        # Base 모델은 해당 프롬프트 토큰이 없을 수 있으므로 확인 또는 다른 프롬프트 사용 필요
+        # 여기서는 일단 진행
+        try:
+             decoder_input_ids_base = processor_base.tokenizer(
+                task_prompt_base,
+                add_special_tokens=False,
+                return_tensors="pt",
+             ).input_ids.to(device)
+        except Exception as tokenizer_e:
+             print(f"Warning: Base processor cannot tokenize prompt '{task_prompt_base}'. Using default generation. Error: {tokenizer_e}")
+             decoder_input_ids_base = None # 프롬프트 없이 생성
+        # 생성 파라미터 설정
+        generation_config_base = {
+            "max_length": model_base.config.decoder.max_position_embeddings,
+            "early_stopping": True,
+            "pad_token_id": processor_base.tokenizer.pad_token_id,
+            "eos_token_id": processor_base.tokenizer.eos_token_id,
+            "use_cache": True,
+            "num_beams": 1, # Greedy decoding
+            "bad_words_ids": [[processor_base.tokenizer.unk_token_id]] if processor_base.tokenizer.unk_token_id else None,
+            "return_dict_in_generate": True,
+        }
+        # 프롬프트가 성공적으로 인코딩 되었으면 추가
+        if decoder_input_ids_base is not None:
+            generation_config_base["decoder_input_ids"] = decoder_input_ids_base
+        outputs_base = model_base.generate(pixel_values_base, **generation_config_base)
+        sequence_base_raw = processor_base.batch_decode(outputs_base.sequences)[0]
+        # print(f"\nBase Raw Output: {sequence_base_raw}") # 서버 로그에 출력 (디버깅용)
+        # Base 모델 결과 클리닝 (skip_special_tokens 사용)
+        sequence_base_cleaned = processor_base.batch_decode(outputs_base.sequences, skip_special_tokens=True)[0]
+        # print(f"Base Cleaned Output (skip_special_tokens): {sequence_base_cleaned}") # 서버 로그에 출력 (디버깅용)
+        # 결과 딕셔너리 생성
+        result_json_base = {
+            "raw_decoded_sequence_preview": sequence_base_raw[:200] + "...", # 원본 결과 프리뷰
+            "output_skip_special_tokens": sequence_base_cleaned # 클리닝된 결과
+        }
+        # 최종 JSON 문자열 변환
+        results_base_json_str = json.dumps(result_json_base, indent=2, ensure_ascii=False, sort_keys=False)
+    except Exception as e:
+        print(f"Error during base model inference: {e}")
+        import traceback
+        traceback.print_exc() # detailed error log on server
+        results_base_json_str = json.dumps({
+            "error": f"Base 모델 추론 오류: {e}",
+            "raw_decoded_sequence_before_error": sequence_base_raw # Include raw if available
+            }, indent=2, ensure_ascii=False)
+    # 두 모델의 결과를 JSON 문자열 형태로 반환
+    return results_ft_json_str, results_base_json_str
+# --- Gradio 인터페이스 정의 ---
+# CSS 스타일 정의
+custom_css = """
+body { background-color: #f0f4f8; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
+#main_title { text-align: center; color: #1a5276; font-size: 2.3em; font-weight: 600; margin-top: 20px; margin-bottom: 5px; }
+#sub_description { text-align: center; color: #566573; font-size: 1.0em; margin-bottom: 25px; }
+.gradio-container { border-radius: 10px !important; box-shadow: 0 3px 10px rgba(0,0,0,0.08); padding: 25px !important; }
+footer { display: none !important; } /* Hide Gradio footer */
+#output-title-ft, #output-title-base { color: #1a5276; font-weight: 600; margin-bottom: 8px; font-size: 1.2em; border-bottom: 2px solid #aed6f1; padding-bottom: 4px; }
+#output_row > div.gradio-column { border: 1px solid #d5dbdb; padding: 15px !important; border-radius: 8px; background-color: #ffffff; margin: 0 8px !important; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
+#json_output_ft > div:nth-child(2), #json_output_base > div:nth-child(2) { max-height: 600px; overflow-y: auto !important; } /* JSON output scroll */
+"""
+# Gradio Blocks 인터페이스 구성
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
+    gr.Markdown("# Donut 모델 비교: Fine-tuned vs Base", elem_id="main_title")
+    gr.Markdown("영수증 이미지를 업로드하면 Fine-tuned 모델(SROIE 파싱)과 Base 모델의 추출 결과를 비교합니다.", elem_id="sub_description")
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="numpy", label="🧾 영수증 이미지 업로드")
+            submit_btn = gr.Button("🚀 결과 비교 시작", variant="primary", scale=0)
+            # --- 예제 이미지 부분은 Spaces 환경에서 경로 문제가 있을 수 있어 일단 주석 처리 ---
+            # 만약 예제 이미지를 Space 저장소에 함께 업로드하고 경로를 맞출 수 있다면 주석 해제 가능
+            # example_img_dir = "example_images" # 예시: 저장소 루트에 example_images 폴더를 만들고 이미지 넣기
+            # example_paths = [os.path.join(example_img_dir, f) for f in ["example1.jpg", "example2.jpg"] if os.path.exists(os.path.join(example_img_dir, f))]
+            # if example_paths:
+            #     gr.Examples(examples=example_paths, inputs=image_input, label="예제 이미지 클릭 (클릭 후 '결과 비교 시작' 버튼 누르세요)")
+            # else:
+            #     gr.Markdown("_(예제 이미지를 찾을 수 없습니다.)_")
+        with gr.Column(scale=2):
+            with gr.Row(elem_id="output_row"):
+                with gr.Column(scale=1):
+                    gr.Markdown("### ✨ Fine-tuned Model (SROIE 파싱)", elem_id="output-title-ft")
+                    # JSON 컴포넌트: label 수정, interactive=False 추가
+                    json_output_ft = gr.JSON(label="Fine-tuned 결과 (JSON)", interactive=False, elem_id="json_output_ft")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 💡 Base Model (Raw + Cleaned)", elem_id="output-title-base")
+                    # JSON 컴포넌트: label 수정, interactive=False 추가
+                    json_output_base = gr.JSON(label="Base 모델 결과 (JSON)", interactive=False, elem_id="json_output_base")
+    # 버튼 클릭 시 실행할 함수 및 입출력 정의
+    submit_btn.click(
+        fn=process_image_comparison,
+        inputs=image_input,
+        outputs=[json_output_ft, json_output_base] # 함수가 반환하는 순서대로 컴포넌트 지정
+    )
+# --- Gradio 앱 실행 ---
+# Hugging Face Spaces 에서 실행될 때는 이 부분이 호출됩니다.
+if __name__ == "__main__":
+    # share=True 는 Spaces 환경에서는 필요 없습니다.
+    demo.launch()

donut_sroie_finetuned/added_tokens.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "</s_address>": 57531,
+  "</s_company>": 57527,
+  "</s_date>": 57529,
+  "</s_total>": 57533,
+  "<s_address>": 57530,
+  "<s_company>": 57526,
+  "<s_date>": 57528,
+  "<s_iitcdip>": 57523,
+  "<s_sroie>": 57525,
+  "<s_synthdog>": 57524,
+  "<s_total>": 57532,
+  "<sep/>": 57522
+}

donut_sroie_finetuned/config.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "add_final_layer_norm": true,
+    "attention_dropout": 0.0,
+    "classifier_dropout": 0.0,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 4,
+    "dropout": 0.1,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 4096,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "max_position_embeddings": 1536,
+    "model_type": "mbart",
+    "num_hidden_layers": 12,
+    "scale_embedding": true,
+    "torch_dtype": "float32",
+    "use_cache": true,
+    "vocab_size": 57534
+  },
+  "decoder_start_token_id": 57525,
+  "encoder": {
+    "attention_probs_dropout_prob": 0.0,
+    "depths": [
+      2,
+      2,
+      14,
+      2
+    ],
+    "drop_path_rate": 0.1,
+    "embed_dim": 128,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 1024,
+    "image_size": [
+      2560,
+      1920
+    ],
+    "initializer_range": 0.02,
+    "layer_norm_eps": 1e-05,
+    "mlp_ratio": 4.0,
+    "model_type": "donut-swin",
+    "num_channels": 3,
+    "num_heads": [
+      4,
+      8,
+      16,
+      32
+    ],
+    "num_layers": 4,
+    "patch_size": 4,
+    "path_norm": true,
+    "qkv_bias": true,
+    "torch_dtype": "float32",
+    "use_absolute_embeddings": false,
+    "window_size": 10
+  },
+  "is_encoder_decoder": true,
+  "max_length": null,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "vocab_size": 57534
+}

donut_sroie_finetuned/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "forced_eos_token_id": 2,
+  "max_length": 1536,
+  "pad_token_id": 1,
+  "transformers_version": "4.50.3"
+}

donut_sroie_finetuned/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbebd2fe9f30d480deaffb6201e514e4421afd9c9129a9c335ddb14feb85a0e6
+size 809107608

donut_sroie_finetuned/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "do_align_long_axis": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_thumbnail": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "DonutImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "DonutProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 2560,
+    "width": 1920
+  }
+}

donut_sroie_finetuned/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
+size 1296245

donut_sroie_finetuned/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,116 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<s_sroie>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s_company>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s_company>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s_date>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s_date>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s_address>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s_address>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s_total>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s_total>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

donut_sroie_finetuned/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

donut_sroie_finetuned/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57521": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57522": {
+      "content": "<sep/>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57523": {
+      "content": "<s_iitcdip>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57524": {
+      "content": "<s_synthdog>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57525": {
+      "content": "<s_sroie>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57526": {
+      "content": "<s_company>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57527": {
+      "content": "</s_company>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57528": {
+      "content": "<s_date>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57529": {
+      "content": "</s_date>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57530": {
+      "content": "<s_address>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57531": {
+      "content": "</s_address>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57532": {
+      "content": "<s_total>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57533": {
+      "content": "</s_total>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<s_sroie>",
+    "<s_company>",
+    "</s_company>",
+    "<s_date>",
+    "</s_date>",
+    "<s_address>",
+    "</s_address>",
+    "<s_total>",
+    "</s_total>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "processor_class": "DonutProcessor",
+  "sep_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

donut_sroie_finetuned/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2069b134cbf99306f3e34c7fcbe05a9b8bfcf4f787559c7679a8c0d0c5dccf76
+size 5496

git.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" > .gitattributes
2	+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+echo "transformers" > requirements.txt
+echo "torch" >> requirements.txt
+echo "torchvision" >> requirements.txt
+echo "Pillow" >> requirements.txt
+echo "gradio" >> requirements.txt
+echo "accelerate" >> requirements.txt