Spaces:
Sleeping
Sleeping
cancel button
Browse files- src/converter.py +69 -47
- src/ui.py +5 -1
src/converter.py
CHANGED
@@ -3,6 +3,7 @@ import logging
|
|
3 |
import time
|
4 |
import os
|
5 |
import threading
|
|
|
6 |
from pathlib import Path
|
7 |
|
8 |
# Use relative imports instead of absolute imports
|
@@ -13,6 +14,8 @@ import parsers
|
|
13 |
|
14 |
# Reference to the cancellation flag from ui.py
|
15 |
conversion_cancelled = None
|
|
|
|
|
16 |
|
17 |
def set_cancellation_flag(flag):
|
18 |
"""Set the reference to the cancellation flag from ui.py"""
|
@@ -21,6 +24,16 @@ def set_cancellation_flag(flag):
|
|
21 |
logging.info(f"Cancellation flag set: {flag}")
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
25 |
"""
|
26 |
Convert a file using the specified parser and OCR method.
|
@@ -36,21 +49,20 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
36 |
"""
|
37 |
global conversion_cancelled
|
38 |
|
|
|
|
|
|
|
|
|
39 |
if not file_path:
|
40 |
return "Please upload a file.", None
|
41 |
|
42 |
-
#
|
43 |
-
if
|
44 |
-
|
45 |
|
46 |
# Create a temporary file with English filename
|
47 |
temp_input = None
|
48 |
try:
|
49 |
-
# Check for early cancellation
|
50 |
-
if conversion_cancelled and conversion_cancelled.is_set():
|
51 |
-
logging.info("Conversion cancelled before file preparation")
|
52 |
-
return "Conversion cancelled.", None
|
53 |
-
|
54 |
original_ext = Path(file_path).suffix
|
55 |
with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
|
56 |
# Copy the content of original file to temp file
|
@@ -59,68 +71,78 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
59 |
file_path = temp_input.name
|
60 |
|
61 |
# Check for cancellation after file preparation
|
62 |
-
if
|
63 |
-
logging.info("Conversion cancelled after file preparation")
|
64 |
cleanup_temp_file(temp_input.name)
|
65 |
return "Conversion cancelled.", None
|
66 |
|
67 |
# Use the parser factory to parse the document
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
#
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
# Check
|
86 |
-
if
|
87 |
-
logging.info("Conversion cancelled after parsing")
|
88 |
cleanup_temp_file(temp_input.name)
|
89 |
return "Conversion cancelled.", None
|
90 |
-
|
91 |
-
duration = time.time() - start
|
92 |
-
logging.info(f"Processed in {duration:.2f} seconds.")
|
93 |
|
94 |
-
#
|
95 |
-
|
96 |
-
|
|
|
|
|
97 |
cleanup_temp_file(temp_input.name)
|
98 |
return "Conversion cancelled.", None
|
99 |
|
100 |
-
# Determine the file extension based on the output format
|
101 |
-
if output_format == "Markdown":
|
102 |
-
ext = ".md"
|
103 |
-
elif output_format == "JSON":
|
104 |
-
ext = ".json"
|
105 |
-
elif output_format == "Text":
|
106 |
-
ext = ".txt"
|
107 |
-
elif output_format == "Document Tags":
|
108 |
-
ext = ".doctags"
|
109 |
-
else:
|
110 |
-
ext = ".txt"
|
111 |
-
|
112 |
# Create a temporary file for download
|
113 |
with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
|
114 |
tmp.write(content)
|
115 |
tmp_path = tmp.name
|
116 |
|
117 |
-
# Clean up
|
118 |
cleanup_temp_file(temp_input.name)
|
|
|
|
|
|
|
|
|
119 |
|
120 |
return content, tmp_path
|
121 |
|
122 |
except Exception as e:
|
123 |
logging.error(f"Error during conversion: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
if temp_input and hasattr(temp_input, 'name'):
|
125 |
cleanup_temp_file(temp_input.name)
|
126 |
return f"Error: {e}", None
|
|
|
3 |
import time
|
4 |
import os
|
5 |
import threading
|
6 |
+
import signal
|
7 |
from pathlib import Path
|
8 |
|
9 |
# Use relative imports instead of absolute imports
|
|
|
14 |
|
15 |
# Reference to the cancellation flag from ui.py
|
16 |
conversion_cancelled = None
|
17 |
+
# Track the current process for cancellation
|
18 |
+
current_conversion_thread = None
|
19 |
|
20 |
def set_cancellation_flag(flag):
|
21 |
"""Set the reference to the cancellation flag from ui.py"""
|
|
|
24 |
logging.info(f"Cancellation flag set: {flag}")
|
25 |
|
26 |
|
27 |
+
def check_cancellation():
|
28 |
+
"""Check if cancellation is requested and interrupt if needed"""
|
29 |
+
global conversion_cancelled
|
30 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
31 |
+
logging.info("Cancellation detected, raising interrupt")
|
32 |
+
# This will raise a KeyboardInterrupt exception in the current thread
|
33 |
+
return True
|
34 |
+
return False
|
35 |
+
|
36 |
+
|
37 |
def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
38 |
"""
|
39 |
Convert a file using the specified parser and OCR method.
|
|
|
49 |
"""
|
50 |
global conversion_cancelled
|
51 |
|
52 |
+
# Record start time for logging
|
53 |
+
start_time = time.time()
|
54 |
+
logging.info(f"Starting conversion of {file_path}")
|
55 |
+
|
56 |
if not file_path:
|
57 |
return "Please upload a file.", None
|
58 |
|
59 |
+
# Check immediately for cancellation
|
60 |
+
if check_cancellation():
|
61 |
+
return "Conversion cancelled.", None
|
62 |
|
63 |
# Create a temporary file with English filename
|
64 |
temp_input = None
|
65 |
try:
|
|
|
|
|
|
|
|
|
|
|
66 |
original_ext = Path(file_path).suffix
|
67 |
with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
|
68 |
# Copy the content of original file to temp file
|
|
|
71 |
file_path = temp_input.name
|
72 |
|
73 |
# Check for cancellation after file preparation
|
74 |
+
if check_cancellation():
|
|
|
75 |
cleanup_temp_file(temp_input.name)
|
76 |
return "Conversion cancelled.", None
|
77 |
|
78 |
# Use the parser factory to parse the document
|
79 |
+
logging.info(f"Starting document parsing with {parser_name} and {ocr_method_name}")
|
80 |
|
81 |
+
def interruptible_parser():
|
82 |
+
"""Run parser in a way that can be checked for cancellation"""
|
83 |
+
try:
|
84 |
+
# Log starting
|
85 |
+
logging.info("Parser thread started")
|
86 |
+
return ParserFactory.parse_document(
|
87 |
+
file_path=file_path,
|
88 |
+
parser_name=parser_name,
|
89 |
+
ocr_method_name=ocr_method_name,
|
90 |
+
output_format=output_format.lower(),
|
91 |
+
cancellation_flag=conversion_cancelled
|
92 |
+
)
|
93 |
+
except Exception as e:
|
94 |
+
logging.error(f"Parser thread error: {str(e)}")
|
95 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
96 |
+
return "Conversion cancelled."
|
97 |
+
raise
|
98 |
+
|
99 |
+
# Regular parsing, but periodically check for cancellation
|
100 |
+
content = None
|
101 |
+
parse_start = time.time()
|
102 |
|
103 |
+
# Perform the actual parsing
|
104 |
+
content = interruptible_parser()
|
105 |
+
|
106 |
+
# If we got here, parsing is complete
|
107 |
+
logging.info(f"Parsing completed in {time.time() - parse_start:.2f} seconds")
|
108 |
+
|
109 |
+
# Check cancellation immediately after parsing
|
110 |
+
if check_cancellation() or content == "Conversion cancelled.":
|
|
|
111 |
cleanup_temp_file(temp_input.name)
|
112 |
return "Conversion cancelled.", None
|
|
|
|
|
|
|
113 |
|
114 |
+
# Determine the file extension
|
115 |
+
ext = get_output_extension(output_format)
|
116 |
+
|
117 |
+
# Final cancellation check before file creation
|
118 |
+
if check_cancellation():
|
119 |
cleanup_temp_file(temp_input.name)
|
120 |
return "Conversion cancelled.", None
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Create a temporary file for download
|
123 |
with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
|
124 |
tmp.write(content)
|
125 |
tmp_path = tmp.name
|
126 |
|
127 |
+
# Clean up temporary files
|
128 |
cleanup_temp_file(temp_input.name)
|
129 |
+
|
130 |
+
# Log completion time
|
131 |
+
total_time = time.time() - start_time
|
132 |
+
logging.info(f"Conversion completed in {total_time:.2f} seconds")
|
133 |
|
134 |
return content, tmp_path
|
135 |
|
136 |
except Exception as e:
|
137 |
logging.error(f"Error during conversion: {str(e)}")
|
138 |
+
|
139 |
+
# Check if this was a cancellation
|
140 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
141 |
+
if temp_input and hasattr(temp_input, 'name'):
|
142 |
+
cleanup_temp_file(temp_input.name)
|
143 |
+
return "Conversion cancelled.", None
|
144 |
+
|
145 |
+
# Other error
|
146 |
if temp_input and hasattr(temp_input, 'name'):
|
147 |
cleanup_temp_file(temp_input.name)
|
148 |
return f"Error: {e}", None
|
src/ui.py
CHANGED
@@ -5,6 +5,7 @@ import time
|
|
5 |
from converter import convert_file, set_cancellation_flag
|
6 |
from docling_chat import chat_with_document
|
7 |
from parser_registry import ParserRegistry
|
|
|
8 |
|
9 |
|
10 |
# Add a global variable to track cancellation state
|
@@ -91,9 +92,12 @@ def handle_page_navigation(direction, current, pages):
|
|
91 |
|
92 |
|
93 |
def cancel_conversion():
|
94 |
-
"""Set the cancellation flag."""
|
95 |
global conversion_cancelled
|
|
|
96 |
conversion_cancelled.set()
|
|
|
|
|
97 |
return gr.update(visible=False)
|
98 |
|
99 |
|
|
|
5 |
from converter import convert_file, set_cancellation_flag
|
6 |
from docling_chat import chat_with_document
|
7 |
from parser_registry import ParserRegistry
|
8 |
+
import logging
|
9 |
|
10 |
|
11 |
# Add a global variable to track cancellation state
|
|
|
92 |
|
93 |
|
94 |
def cancel_conversion():
|
95 |
+
"""Set the cancellation flag and force UI update."""
|
96 |
global conversion_cancelled
|
97 |
+
# Set the flag
|
98 |
conversion_cancelled.set()
|
99 |
+
logging.info("Cancel button clicked, flag set")
|
100 |
+
# Update UI immediately to show cancellation is in progress
|
101 |
return gr.update(visible=False)
|
102 |
|
103 |
|