Seokjin commited on
Commit
ef07723
·
1 Parent(s): 9cdefcc

Configure git user, fix paths and LFS settings, ignore checkpoints

Browse files
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" > .gitattributes
2
+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .ipynb_checkpoints/
app.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # --- 필요한 모듈 임포트 ---
3
+ import gradio as gr
4
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
5
+ from PIL import Image
6
+ import torch
7
+ import re
8
+ import json
9
+ import os
10
+ import warnings
11
+
12
+ # --- 경고 메시지 무시 ---
13
+ # UserWarning: TypedStorage is deprecated 는 PyTorch 관련 경고로 무시해도 괜찮습니다.
14
+ warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
15
+ # Future or other warnings if needed
16
+ warnings.filterwarnings("ignore", category=FutureWarning)
17
+
18
+ # --- 모델 및 프로세서 경로 정의 ---
19
+ # Hugging Face Spaces 저장소 내부에 모델 파일을 복사했다고 가정합니다.
20
+ # 저장소 루트에 donut_sroie_finetuned 폴더가 있고 그 안에 final_model 이 있는 구조
21
+ model_path_finetuned = "./donut_sroie_finetuned"
22
+ model_name_base = "naver-clova-ix/donut-base" # Base 모델은 Hub에서 직접 로드
23
+
24
+ # --- Fine-tuned Processor 및 모델 로딩 ---
25
+ # Spaces 환경에서는 우선 local_files_only=True 로 시도합니다 (Git LFS로 파일이 있다면)
26
+ print(f"Loading Fine-tuned processor from: {model_path_finetuned}")
27
+ try:
28
+ # local_files_only=True 를 사용하여 Spaces 저장소 내 파일만 사용하도록 강제
29
+ processor = DonutProcessor.from_pretrained(model_path_finetuned, local_files_only=True)
30
+ print("Successfully loaded fine-tuned processor locally from Space repo.")
31
+ except Exception as e:
32
+ print(f"Error loading fine-tuned processor locally: {e}. Check if model files exist at the path.")
33
+ # 필요시 Hub에서 로드 시도하는 로직 추가 가능 (단, 모델이 Hub에 업로드 되어 있어야 함)
34
+ # try:
35
+ # processor = DonutProcessor.from_pretrained("your-hf-username/your-model-repo-name") # Hub 경로 예시
36
+ # print("Loaded fine-tuned processor from Hub as fallback.")
37
+ # except Exception as e2:
38
+ # print(f"FATAL: Could not load fine-tuned processor locally or from Hub: {e2}")
39
+ # exit()
40
+ # 여기서는 로컬 로딩 실패 시 일단 종료하도록 함 (수정 필요시 주석 해제)
41
+ print(f"FATAL: Could not load fine-tuned processor locally: {e}")
42
+ exit()
43
+
44
+
45
+ print(f"Loading Fine-tuned model from: {model_path_finetuned}")
46
+ try:
47
+ # local_files_only=True 를 사용하여 Spaces 저장소 내 파일만 사용하도록 강제
48
+ model_finetuned = VisionEncoderDecoderModel.from_pretrained(model_path_finetuned, local_files_only=True)
49
+ print("Successfully loaded fine-tuned model locally from Space repo.")
50
+ except Exception as e:
51
+ print(f"Error loading fine-tuned model locally: {e}. Check if model files exist at the path.")
52
+ # 필요시 Hub에서 로드 시도하는 로직 추가 가능 (단, 모델이 Hub에 업로드 되어 있어야 함)
53
+ # try:
54
+ # model_finetuned = VisionEncoderDecoderModel.from_pretrained("your-hf-username/your-model-repo-name") # Hub 경로 예시
55
+ # print("Loaded fine-tuned model from Hub as fallback.")
56
+ # except Exception as e2:
57
+ # print(f"FATAL: Could not load fine-tuned model locally or from Hub: {e2}")
58
+ # exit()
59
+ # 여기서는 로컬 로딩 실패 시 일단 종료하도록 함 (수정 필요시 주석 해제)
60
+ print(f"FATAL: Could not load fine-tuned model locally: {e}")
61
+ exit()
62
+
63
+
64
+ # --- Base Processor 및 모델 로딩 (Hub에서 직접) ---
65
+ print(f"Loading Base processor from: {model_name_base}")
66
+ try:
67
+ processor_base = DonutProcessor.from_pretrained(model_name_base)
68
+ print("Successfully loaded base processor.")
69
+ except Exception as e:
70
+ print(f"FATAL: Could not load base processor: {e}")
71
+ exit()
72
+
73
+ print(f"Loading Base model from: {model_name_base}")
74
+ try:
75
+ model_base = VisionEncoderDecoderModel.from_pretrained(model_name_base)
76
+ print("Successfully loaded base model.")
77
+ except Exception as e:
78
+ print(f"FATAL: Could not load base model: {e}")
79
+ exit()
80
+
81
+
82
+ # --- 장치 설정 및 모델 이동 ---
83
+ # Spaces 환경에서는 CPU 또는 할당된 GPU를 사용합니다.
84
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
+ print(f"\nUsing device: {device}")
86
+
87
+ # 모델을 해당 장치로 이동
88
+ try:
89
+ model_finetuned.to(device)
90
+ model_base.to(device)
91
+ print("Models moved to device.")
92
+ # 평가 모드 설정 (필수)
93
+ model_finetuned.eval()
94
+ model_base.eval()
95
+ print("Models set to evaluation mode.")
96
+ except Exception as e:
97
+ print(f"Error moving models to device or setting eval mode: {e}")
98
+ exit()
99
+
100
+
101
+ # --- Helper function to clean generated sequence (주로 Fine-tuned용) ---
102
+ def clean_sequence(sequence, processor_to_use, prompt_token_str=None):
103
+ """Removes prompt, EOS, PAD tokens from a generated sequence."""
104
+ cleaned = sequence
105
+ try:
106
+ # Standard tokens first
107
+ eos_token = processor_to_use.tokenizer.eos_token if processor_to_use.tokenizer.eos_token else "</s>" # Default EOS
108
+ pad_token = processor_to_use.tokenizer.pad_token if processor_to_use.tokenizer.pad_token else "<pad>" # Default PAD
109
+ cleaned = cleaned.replace(eos_token, "").replace(pad_token, "").strip()
110
+
111
+ # Add BOS token removal if it exists and appears
112
+ if hasattr(processor_to_use.tokenizer, 'bos_token') and processor_to_use.tokenizer.bos_token:
113
+ cleaned = cleaned.replace(processor_to_use.tokenizer.bos_token, "").strip()
114
+
115
+ # Specific prompt removal (case-insensitive start check can be robust)
116
+ if prompt_token_str:
117
+ # Simple startswith check might be enough if prompt is always at the beginning
118
+ if cleaned.startswith(prompt_token_str):
119
+ cleaned = cleaned[len(prompt_token_str):].strip()
120
+ # Regex version (more robust but slightly slower)
121
+ # cleaned = re.sub(f"^{re.escape(prompt_token_str)}", "", cleaned, flags=re.IGNORECASE).strip()
122
+
123
+ except Exception as e:
124
+ print(f"Warning: Error during sequence cleaning: {e}")
125
+ return sequence # Return original if cleaning fails
126
+ return cleaned
127
+
128
+ # --- Helper function to parse SROIE format ---
129
+ def token2json_simple(text):
130
+ """Parses <s_key>value</s_key> format into a dictionary."""
131
+ output = {}
132
+ # Regex to find <s_...>...</s_...> patterns, handling potential spaces and newlines in value
133
+ # It captures the key name (e.g., "company") and the value between the tags.
134
+ parts = re.findall(r"<s_(.*?)>([\s\S]*?)</s_\1>", text)
135
+ for key, value in parts:
136
+ # Strip leading/trailing whitespace from key and value
137
+ output[key.strip()] = value.strip()
138
+
139
+ # Add info if parsing failed but text was present
140
+ if not output and text and not text.isspace():
141
+ output["parsing_info"] = "Could not parse SROIE key-value pairs from the cleaned sequence."
142
+ output["cleaned_sequence_preview"] = text[:200] + "..." # Show preview
143
+ elif not text or text.isspace():
144
+ output["parsing_info"] = "Empty sequence after cleaning, nothing to parse."
145
+
146
+ return output
147
+
148
+ # --- 통합 이미지 처리 및 추론 함수 ---
149
+ # 데코레이터 추가: 그래디언트 계산 비활성화 (추론 시 메모리 절약 및 속도 향상)
150
+ @torch.no_grad()
151
+ def process_image_comparison(image_input):
152
+ if image_input is None:
153
+ no_image_msg = {"error": "이미지를 업로드해주세요."}
154
+ # Ensure JSON output for Gradio component
155
+ return json.dumps(no_image_msg, indent=2, ensure_ascii=False), json.dumps(no_image_msg, indent=2, ensure_ascii=False)
156
+
157
+ try:
158
+ # Gradio's numpy input needs conversion
159
+ image = Image.fromarray(image_input).convert("RGB")
160
+ except Exception as e:
161
+ error_msg = {"error": f"이미지 변환 오류: {e}"}
162
+ error_json_str = json.dumps(error_msg, indent=2, ensure_ascii=False)
163
+ return error_json_str, error_json_str
164
+
165
+ results_ft_json_str = "{}"
166
+ results_base_json_str = "{}"
167
+ sequence_ft_raw = "N/A"
168
+ sequence_base_raw = "N/A"
169
+
170
+ # === Fine-tuned 모델 추론 ===
171
+ try:
172
+ pixel_values_ft = processor(image, return_tensors="pt").pixel_values.to(device)
173
+ task_prompt_ft = "<s_sroie>" # Fine-tuned 모델의 시작 프롬프트
174
+ decoder_input_ids_ft = processor.tokenizer(
175
+ task_prompt_ft, add_special_tokens=False, return_tensors="pt"
176
+ ).input_ids.to(device)
177
+
178
+ # 생성 시 필요한 파라미터 설정
179
+ generation_config_ft = {
180
+ "max_length": model_finetuned.config.decoder.max_position_embeddings,
181
+ "pad_token_id": processor.tokenizer.pad_token_id,
182
+ "eos_token_id": processor.tokenizer.eos_token_id,
183
+ "use_cache": True,
184
+ "bad_words_ids": [[processor.tokenizer.unk_token_id]] if processor.tokenizer.unk_token_id else None,
185
+ "return_dict_in_generate": True,
186
+ "decoder_input_ids": decoder_input_ids_ft # 시작 프롬프트 제공
187
+ }
188
+
189
+ outputs_ft = model_finetuned.generate(pixel_values_ft, **generation_config_ft)
190
+
191
+ sequence_ft_raw = processor.batch_decode(outputs_ft.sequences)[0]
192
+ # print(f"\nFine-tuned Raw Output: {sequence_ft_raw}") # 서버 로그에 출력 (디버깅용)
193
+
194
+ # Fine-tuned 모델 결과 클리닝
195
+ sequence_ft_cleaned = clean_sequence(sequence_ft_raw, processor, prompt_token_str=task_prompt_ft)
196
+ # print(f"Fine-tuned Cleaned Output: {sequence_ft_cleaned}") # 서버 로그에 출력 (디버깅용)
197
+
198
+ # 클리닝된 결과 파싱
199
+ result_json_ft = token2json_simple(sequence_ft_cleaned)
200
+ result_json_ft["raw_decoded_sequence_preview"] = sequence_ft_raw[:200] + "..." # 원본 결과 프리뷰 추가
201
+
202
+ # 최종 JSON 문자열 변환
203
+ results_ft_json_str = json.dumps(result_json_ft, indent=2, ensure_ascii=False, sort_keys=False)
204
+
205
+ except Exception as e:
206
+ print(f"Error during fine-tuned model inference: {e}")
207
+ import traceback
208
+ traceback.print_exc() # detailed error log on server
209
+ results_ft_json_str = json.dumps({
210
+ "error": f"Fine-tuned 모델 추론 오류: {e}",
211
+ "raw_decoded_sequence_before_error": sequence_ft_raw
212
+ }, indent=2, ensure_ascii=False)
213
+
214
+ # === Base 모델 추론 ===
215
+ try:
216
+ pixel_values_base = processor_base(image, return_tensors="pt").pixel_values.to(device)
217
+ # Base 모델용 프롬프트 (예: <s_iitcdip> 또는 다른 일반 문서 프롬프트)
218
+ # 여기서는 이전 코드와 동일하게 <s_iitcdip> 사용
219
+ task_prompt_base = "<s_iitcdip>"
220
+ # Base 모델은 해당 프롬프트 토큰이 없을 수 있으므로 확인 또는 다른 프롬프트 사용 필요
221
+ # 여기서는 일단 진행
222
+ try:
223
+ decoder_input_ids_base = processor_base.tokenizer(
224
+ task_prompt_base,
225
+ add_special_tokens=False,
226
+ return_tensors="pt",
227
+ ).input_ids.to(device)
228
+ except Exception as tokenizer_e:
229
+ print(f"Warning: Base processor cannot tokenize prompt '{task_prompt_base}'. Using default generation. Error: {tokenizer_e}")
230
+ decoder_input_ids_base = None # 프롬프트 없이 생성
231
+
232
+ # 생성 파라미터 설정
233
+ generation_config_base = {
234
+ "max_length": model_base.config.decoder.max_position_embeddings,
235
+ "early_stopping": True,
236
+ "pad_token_id": processor_base.tokenizer.pad_token_id,
237
+ "eos_token_id": processor_base.tokenizer.eos_token_id,
238
+ "use_cache": True,
239
+ "num_beams": 1, # Greedy decoding
240
+ "bad_words_ids": [[processor_base.tokenizer.unk_token_id]] if processor_base.tokenizer.unk_token_id else None,
241
+ "return_dict_in_generate": True,
242
+ }
243
+ # 프롬프트가 성공적으로 인코딩 되었으면 추가
244
+ if decoder_input_ids_base is not None:
245
+ generation_config_base["decoder_input_ids"] = decoder_input_ids_base
246
+
247
+ outputs_base = model_base.generate(pixel_values_base, **generation_config_base)
248
+
249
+ sequence_base_raw = processor_base.batch_decode(outputs_base.sequences)[0]
250
+ # print(f"\nBase Raw Output: {sequence_base_raw}") # 서버 로그에 출력 (디버깅용)
251
+
252
+ # Base 모델 결과 클리닝 (skip_special_tokens 사용)
253
+ sequence_base_cleaned = processor_base.batch_decode(outputs_base.sequences, skip_special_tokens=True)[0]
254
+ # print(f"Base Cleaned Output (skip_special_tokens): {sequence_base_cleaned}") # 서버 로그에 출력 (디버깅용)
255
+
256
+ # 결과 딕셔너리 생성
257
+ result_json_base = {
258
+ "raw_decoded_sequence_preview": sequence_base_raw[:200] + "...", # 원본 결과 프리뷰
259
+ "output_skip_special_tokens": sequence_base_cleaned # 클리닝된 결과
260
+ }
261
+ # 최종 JSON 문자열 변환
262
+ results_base_json_str = json.dumps(result_json_base, indent=2, ensure_ascii=False, sort_keys=False)
263
+
264
+ except Exception as e:
265
+ print(f"Error during base model inference: {e}")
266
+ import traceback
267
+ traceback.print_exc() # detailed error log on server
268
+ results_base_json_str = json.dumps({
269
+ "error": f"Base 모델 추론 오류: {e}",
270
+ "raw_decoded_sequence_before_error": sequence_base_raw # Include raw if available
271
+ }, indent=2, ensure_ascii=False)
272
+
273
+ # 두 모델의 결과를 JSON 문자열 형태로 반환
274
+ return results_ft_json_str, results_base_json_str
275
+
276
+
277
+ # --- Gradio 인터페이스 정의 ---
278
+ # CSS 스타일 정의
279
+ custom_css = """
280
+ body { background-color: #f0f4f8; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
281
+ #main_title { text-align: center; color: #1a5276; font-size: 2.3em; font-weight: 600; margin-top: 20px; margin-bottom: 5px; }
282
+ #sub_description { text-align: center; color: #566573; font-size: 1.0em; margin-bottom: 25px; }
283
+ .gradio-container { border-radius: 10px !important; box-shadow: 0 3px 10px rgba(0,0,0,0.08); padding: 25px !important; }
284
+ footer { display: none !important; } /* Hide Gradio footer */
285
+ #output-title-ft, #output-title-base { color: #1a5276; font-weight: 600; margin-bottom: 8px; font-size: 1.2em; border-bottom: 2px solid #aed6f1; padding-bottom: 4px; }
286
+ #output_row > div.gradio-column { border: 1px solid #d5dbdb; padding: 15px !important; border-radius: 8px; background-color: #ffffff; margin: 0 8px !important; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
287
+ #json_output_ft > div:nth-child(2), #json_output_base > div:nth-child(2) { max-height: 600px; overflow-y: auto !important; } /* JSON output scroll */
288
+ """
289
+
290
+ # Gradio Blocks 인터페이스 구성
291
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
292
+ gr.Markdown("# Donut 모델 비교: Fine-tuned vs Base", elem_id="main_title")
293
+ gr.Markdown("영수증 이미지를 업로드하면 Fine-tuned 모델(SROIE 파싱)과 Base 모델의 추출 결과를 비교합니다.", elem_id="sub_description")
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=1):
297
+ image_input = gr.Image(type="numpy", label="🧾 영수증 이미지 업로드")
298
+ submit_btn = gr.Button("🚀 결과 비교 시작", variant="primary", scale=0)
299
+ # --- 예제 이미지 부분은 Spaces 환경에서 경로 문제가 있을 수 있어 일단 주석 처리 ---
300
+ # 만약 예제 이미지를 Space 저장소에 함께 업로드하고 경로를 맞출 수 있다면 주석 해제 가능
301
+ # example_img_dir = "example_images" # 예시: 저장소 루트에 example_images 폴더를 만들고 이미지 넣기
302
+ # example_paths = [os.path.join(example_img_dir, f) for f in ["example1.jpg", "example2.jpg"] if os.path.exists(os.path.join(example_img_dir, f))]
303
+ # if example_paths:
304
+ # gr.Examples(examples=example_paths, inputs=image_input, label="예제 이미지 클릭 (클릭 후 '결과 비교 시작' 버튼 누르세요)")
305
+ # else:
306
+ # gr.Markdown("_(예제 이미지를 찾을 수 없습니다.)_")
307
+
308
+ with gr.Column(scale=2):
309
+ with gr.Row(elem_id="output_row"):
310
+ with gr.Column(scale=1):
311
+ gr.Markdown("### ✨ Fine-tuned Model (SROIE 파싱)", elem_id="output-title-ft")
312
+ # JSON 컴포넌트: label 수정, interactive=False 추가
313
+ json_output_ft = gr.JSON(label="Fine-tuned 결과 (JSON)", interactive=False, elem_id="json_output_ft")
314
+ with gr.Column(scale=1):
315
+ gr.Markdown("### 💡 Base Model (Raw + Cleaned)", elem_id="output-title-base")
316
+ # JSON 컴포넌트: label 수정, interactive=False 추가
317
+ json_output_base = gr.JSON(label="Base 모델 결과 (JSON)", interactive=False, elem_id="json_output_base")
318
+
319
+ # 버튼 클릭 시 실행할 함수 및 입출력 정의
320
+ submit_btn.click(
321
+ fn=process_image_comparison,
322
+ inputs=image_input,
323
+ outputs=[json_output_ft, json_output_base] # 함수가 반환하는 순서대로 컴포넌트 지정
324
+ )
325
+
326
+ # --- Gradio 앱 실행 ---
327
+ # Hugging Face Spaces 에서 실행될 때는 이 부분이 호출됩니다.
328
+ if __name__ == "__main__":
329
+ # share=True 는 Spaces 환경에서는 필요 없습니다.
330
+ demo.launch()
donut_sroie_finetuned/added_tokens.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s_address>": 57531,
3
+ "</s_company>": 57527,
4
+ "</s_date>": 57529,
5
+ "</s_total>": 57533,
6
+ "<s_address>": 57530,
7
+ "<s_company>": 57526,
8
+ "<s_date>": 57528,
9
+ "<s_iitcdip>": 57523,
10
+ "<s_sroie>": 57525,
11
+ "<s_synthdog>": 57524,
12
+ "<s_total>": 57532,
13
+ "<sep/>": 57522
14
+ }
donut_sroie_finetuned/config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "VisionEncoderDecoderModel"
4
+ ],
5
+ "decoder": {
6
+ "activation_dropout": 0.0,
7
+ "activation_function": "gelu",
8
+ "add_cross_attention": true,
9
+ "add_final_layer_norm": true,
10
+ "attention_dropout": 0.0,
11
+ "classifier_dropout": 0.0,
12
+ "d_model": 1024,
13
+ "decoder_attention_heads": 16,
14
+ "decoder_ffn_dim": 4096,
15
+ "decoder_layerdrop": 0.0,
16
+ "decoder_layers": 4,
17
+ "dropout": 0.1,
18
+ "encoder_attention_heads": 16,
19
+ "encoder_ffn_dim": 4096,
20
+ "encoder_layerdrop": 0.0,
21
+ "encoder_layers": 12,
22
+ "init_std": 0.02,
23
+ "is_decoder": true,
24
+ "is_encoder_decoder": false,
25
+ "max_position_embeddings": 1536,
26
+ "model_type": "mbart",
27
+ "num_hidden_layers": 12,
28
+ "scale_embedding": true,
29
+ "torch_dtype": "float32",
30
+ "use_cache": true,
31
+ "vocab_size": 57534
32
+ },
33
+ "decoder_start_token_id": 57525,
34
+ "encoder": {
35
+ "attention_probs_dropout_prob": 0.0,
36
+ "depths": [
37
+ 2,
38
+ 2,
39
+ 14,
40
+ 2
41
+ ],
42
+ "drop_path_rate": 0.1,
43
+ "embed_dim": 128,
44
+ "hidden_act": "gelu",
45
+ "hidden_dropout_prob": 0.0,
46
+ "hidden_size": 1024,
47
+ "image_size": [
48
+ 2560,
49
+ 1920
50
+ ],
51
+ "initializer_range": 0.02,
52
+ "layer_norm_eps": 1e-05,
53
+ "mlp_ratio": 4.0,
54
+ "model_type": "donut-swin",
55
+ "num_channels": 3,
56
+ "num_heads": [
57
+ 4,
58
+ 8,
59
+ 16,
60
+ 32
61
+ ],
62
+ "num_layers": 4,
63
+ "patch_size": 4,
64
+ "path_norm": true,
65
+ "qkv_bias": true,
66
+ "torch_dtype": "float32",
67
+ "use_absolute_embeddings": false,
68
+ "window_size": 10
69
+ },
70
+ "is_encoder_decoder": true,
71
+ "max_length": null,
72
+ "model_type": "vision-encoder-decoder",
73
+ "pad_token_id": 1,
74
+ "tie_word_embeddings": false,
75
+ "torch_dtype": "float32",
76
+ "transformers_version": "4.50.3",
77
+ "vocab_size": 57534
78
+ }
donut_sroie_finetuned/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "max_length": 1536,
7
+ "pad_token_id": 1,
8
+ "transformers_version": "4.50.3"
9
+ }
donut_sroie_finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbebd2fe9f30d480deaffb6201e514e4421afd9c9129a9c335ddb14feb85a0e6
3
+ size 809107608
donut_sroie_finetuned/preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_align_long_axis": true,
3
+ "do_normalize": true,
4
+ "do_pad": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "do_thumbnail": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "DonutImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "DonutProcessor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 2560,
24
+ "width": 1920
25
+ }
26
+ }
donut_sroie_finetuned/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
3
+ size 1296245
donut_sroie_finetuned/special_tokens_map.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s_sroie>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<s_company>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</s_company>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<s_date>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "</s_date>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<s_address>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "</s_address>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<s_total>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "</s_total>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ }
66
+ ],
67
+ "bos_token": {
68
+ "content": "<s>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false
73
+ },
74
+ "cls_token": {
75
+ "content": "<s>",
76
+ "lstrip": false,
77
+ "normalized": false,
78
+ "rstrip": false,
79
+ "single_word": false
80
+ },
81
+ "eos_token": {
82
+ "content": "</s>",
83
+ "lstrip": false,
84
+ "normalized": false,
85
+ "rstrip": false,
86
+ "single_word": false
87
+ },
88
+ "mask_token": {
89
+ "content": "<mask>",
90
+ "lstrip": true,
91
+ "normalized": true,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ },
95
+ "pad_token": {
96
+ "content": "<pad>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ },
102
+ "sep_token": {
103
+ "content": "</s>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false
108
+ },
109
+ "unk_token": {
110
+ "content": "<unk>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false
115
+ }
116
+ }
donut_sroie_finetuned/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
donut_sroie_finetuned/tokenizer_config.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "57521": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "57522": {
44
+ "content": "<sep/>",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "57523": {
52
+ "content": "<s_iitcdip>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "57524": {
60
+ "content": "<s_synthdog>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "57525": {
68
+ "content": "<s_sroie>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "57526": {
76
+ "content": "<s_company>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "57527": {
84
+ "content": "</s_company>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "57528": {
92
+ "content": "<s_date>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "57529": {
100
+ "content": "</s_date>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "57530": {
108
+ "content": "<s_address>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "57531": {
116
+ "content": "</s_address>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "57532": {
124
+ "content": "<s_total>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "57533": {
132
+ "content": "</s_total>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ }
139
+ },
140
+ "additional_special_tokens": [
141
+ "<s_sroie>",
142
+ "<s_company>",
143
+ "</s_company>",
144
+ "<s_date>",
145
+ "</s_date>",
146
+ "<s_address>",
147
+ "</s_address>",
148
+ "<s_total>",
149
+ "</s_total>"
150
+ ],
151
+ "bos_token": "<s>",
152
+ "clean_up_tokenization_spaces": false,
153
+ "cls_token": "<s>",
154
+ "eos_token": "</s>",
155
+ "extra_special_tokens": {},
156
+ "mask_token": "<mask>",
157
+ "model_max_length": 1000000000000000019884624838656,
158
+ "pad_token": "<pad>",
159
+ "processor_class": "DonutProcessor",
160
+ "sep_token": "</s>",
161
+ "sp_model_kwargs": {},
162
+ "tokenizer_class": "XLMRobertaTokenizer",
163
+ "unk_token": "<unk>"
164
+ }
donut_sroie_finetuned/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2069b134cbf99306f3e34c7fcbe05a9b8bfcf4f787559c7679a8c0d0c5dccf76
3
+ size 5496
git.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" > .gitattributes
2
+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ echo "transformers" > requirements.txt
2
+ echo "torch" >> requirements.txt
3
+ echo "torchvision" >> requirements.txt
4
+ echo "Pillow" >> requirements.txt
5
+ echo "gradio" >> requirements.txt
6
+ echo "accelerate" >> requirements.txt