AnseMin commited on
Commit
02fee92
·
1 Parent(s): eb2eaac

cancel button fix 3

Browse files
Files changed (3) hide show
  1. src/converter.py +41 -72
  2. src/parser_factory.py +14 -23
  3. src/ui.py +25 -29
src/converter.py CHANGED
@@ -39,11 +39,8 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
42
- # Check for cancellation
43
- if conversion_cancelled and conversion_cancelled.is_set():
44
- return "Conversion cancelled.", None
45
-
46
  # Create a temporary file with English filename
 
47
  try:
48
  original_ext = Path(file_path).suffix
49
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
@@ -51,92 +48,64 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
51
  with open(file_path, 'rb') as original:
52
  temp_input.write(original.read())
53
  file_path = temp_input.name
54
- except Exception as e:
55
- return f"Error creating temporary file: {e}", None
56
 
57
- # Check for cancellation again
58
- if conversion_cancelled and conversion_cancelled.is_set():
59
- # Clean up temp file
60
- try:
61
- os.unlink(temp_input.name)
62
- except:
63
- pass
64
- return "Conversion cancelled.", None
65
 
66
- try:
67
  # Use the parser factory to parse the document
68
  start = time.time()
69
-
70
- # We need to modify the parsing process to check for cancellation
71
- # This requires changes to the parser implementation, but we can add a hook here
72
-
73
- # Pass the cancellation flag to the parser factory
74
  content = ParserFactory.parse_document(
75
  file_path=file_path,
76
  parser_name=parser_name,
77
  ocr_method_name=ocr_method_name,
78
  output_format=output_format.lower(),
79
- cancellation_flag=conversion_cancelled # Pass the flag to parsers
80
  )
81
 
82
- duration = time.time() - start
83
- logging.info(f"Processed in {duration:.2f} seconds.")
84
-
85
- # Check for cancellation after processing
86
  if conversion_cancelled and conversion_cancelled.is_set():
87
- # Clean up temp file
88
- try:
89
- os.unlink(temp_input.name)
90
- except:
91
- pass
92
  return "Conversion cancelled.", None
93
-
94
- except Exception as e:
95
- # Clean up temp file
96
- try:
97
- os.unlink(temp_input.name)
98
- except:
99
- pass
100
- return f"Error: {e}", None
101
 
102
- # Determine the file extension based on the output format
103
- if output_format == "Markdown":
104
- ext = ".md"
105
- elif output_format == "JSON":
106
- ext = ".json"
107
- elif output_format == "Text":
108
- ext = ".txt"
109
- elif output_format == "Document Tags":
110
- ext = ".doctags"
111
- else:
112
- ext = ".txt"
113
-
114
- # Check for cancellation again
115
- if conversion_cancelled and conversion_cancelled.is_set():
116
- # Clean up temp file
117
- try:
118
- os.unlink(temp_input.name)
119
- except:
120
- pass
121
- return "Conversion cancelled.", None
122
 
123
- try:
124
- # Create a temporary file for download
 
 
125
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
126
  tmp.write(content)
127
  tmp_path = tmp.name
128
-
129
- # Clean up the temporary input file
130
- try:
131
- os.unlink(temp_input.name)
132
- except:
133
- pass
134
-
135
  return content, tmp_path
 
136
  except Exception as e:
137
- # Clean up temp file
138
- try:
139
- os.unlink(temp_input.name)
140
- except:
141
- pass
142
  return f"Error: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
 
 
 
 
42
  # Create a temporary file with English filename
43
+ temp_input = None
44
  try:
45
  original_ext = Path(file_path).suffix
46
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
 
48
  with open(file_path, 'rb') as original:
49
  temp_input.write(original.read())
50
  file_path = temp_input.name
 
 
51
 
52
+ # Early cancellation check
53
+ if conversion_cancelled and conversion_cancelled.is_set():
54
+ cleanup_temp_file(temp_input.name)
55
+ return "Conversion cancelled.", None
 
 
 
 
56
 
 
57
  # Use the parser factory to parse the document
58
  start = time.time()
 
 
 
 
 
59
  content = ParserFactory.parse_document(
60
  file_path=file_path,
61
  parser_name=parser_name,
62
  ocr_method_name=ocr_method_name,
63
  output_format=output_format.lower(),
64
+ cancellation_flag=conversion_cancelled
65
  )
66
 
67
+ # If conversion was cancelled during parsing
 
 
 
68
  if conversion_cancelled and conversion_cancelled.is_set():
69
+ cleanup_temp_file(temp_input.name)
 
 
 
 
70
  return "Conversion cancelled.", None
 
 
 
 
 
 
 
 
71
 
72
+ duration = time.time() - start
73
+ logging.info(f"Processed in {duration:.2f} seconds.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # Determine output file extension
76
+ ext = get_output_extension(output_format)
77
+
78
+ # Create download file
79
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
80
  tmp.write(content)
81
  tmp_path = tmp.name
82
+
83
+ # Cleanup and return
84
+ cleanup_temp_file(temp_input.name)
 
 
 
 
85
  return content, tmp_path
86
+
87
  except Exception as e:
88
+ if temp_input and temp_input.name:
89
+ cleanup_temp_file(temp_input.name)
 
 
 
90
  return f"Error: {e}", None
91
+
92
+
93
+ def cleanup_temp_file(file_path):
94
+ """Helper function to clean up temporary files"""
95
+ try:
96
+ os.unlink(file_path)
97
+ except:
98
+ pass
99
+
100
+
101
+ def get_output_extension(output_format):
102
+ """Helper function to get the appropriate file extension"""
103
+ if output_format == "Markdown":
104
+ return ".md"
105
+ elif output_format == "JSON":
106
+ return ".json"
107
+ elif output_format == "Text":
108
+ return ".txt"
109
+ elif output_format == "Document Tags":
110
+ return ".doctags"
111
+ return ".txt"
src/parser_factory.py CHANGED
@@ -28,25 +28,14 @@ class ParserFactory:
28
  return parser_class()
29
 
30
  @classmethod
31
- def parse_document(cls,
32
- file_path: Union[str, Path],
33
- parser_name: str,
34
- ocr_method_name: str,
35
- cancellation_flag: Optional[threading.Event] = None,
36
  **kwargs) -> str:
37
- """
38
- Parse a document using the specified parser and OCR method.
39
-
40
- Args:
41
- file_path: Path to the document
42
- parser_name: Name of the parser to use
43
- ocr_method_name: Display name of the OCR method to use
44
- cancellation_flag: Optional flag to check for cancellation
45
- **kwargs: Additional parser-specific options
46
-
47
- Returns:
48
- str: The parsed content
49
- """
50
  parser = cls.create_parser(parser_name)
51
  if not parser:
52
  raise ValueError(f"Unknown parser: {parser_name}")
@@ -56,10 +45,12 @@ class ParserFactory:
56
  if not ocr_method_id:
57
  raise ValueError(f"Unknown OCR method: {ocr_method_name} for parser {parser_name}")
58
 
59
- # Check for cancellation
60
- if cancellation_flag and cancellation_flag.is_set():
61
- return "Conversion cancelled."
62
-
63
  # Parse the document, passing the cancellation flag
64
  kwargs['cancellation_flag'] = cancellation_flag
65
- return parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
 
 
 
 
 
 
 
28
  return parser_class()
29
 
30
  @classmethod
31
+ def parse_document(cls, file_path: Union[str, Path], parser_name: str,
32
+ ocr_method_name: str, cancellation_flag: Optional[threading.Event] = None,
 
 
 
33
  **kwargs) -> str:
34
+ """Parse a document using the specified parser and OCR method."""
35
+ # Quick cancellation check
36
+ if cancellation_flag and cancellation_flag.is_set():
37
+ return "Conversion cancelled."
38
+
 
 
 
 
 
 
 
 
39
  parser = cls.create_parser(parser_name)
40
  if not parser:
41
  raise ValueError(f"Unknown parser: {parser_name}")
 
45
  if not ocr_method_id:
46
  raise ValueError(f"Unknown OCR method: {ocr_method_name} for parser {parser_name}")
47
 
 
 
 
 
48
  # Parse the document, passing the cancellation flag
49
  kwargs['cancellation_flag'] = cancellation_flag
50
+ result = parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
51
+
52
+ # Final cancellation check before returning
53
+ if cancellation_flag and cancellation_flag.is_set():
54
+ return "Conversion cancelled."
55
+
56
+ return result
src/ui.py CHANGED
@@ -63,24 +63,21 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format):
63
  """Handle file conversion."""
64
  global conversion_cancelled
65
 
66
- # Check if cancelled
67
- if conversion_cancelled.is_set():
68
- conversion_cancelled.clear()
69
- return "Conversion cancelled.", None, [], 1, "", gr.update(visible=False), gr.update(visible=False)
70
-
71
- # Perform the conversion
72
- content, download_file = convert_file(file_path, parser_name, ocr_method_name, output_format)
73
-
74
- # Check if cancelled after conversion
75
- if conversion_cancelled.is_set():
76
- conversion_cancelled.clear()
77
- return "Conversion cancelled.", None, [], 1, "", gr.update(visible=False), gr.update(visible=False)
78
-
79
- # Process results
80
- pages = split_content_into_pages(str(content))
81
- page_info = f"Page 1/{len(pages)}"
82
-
83
- return str(pages[0]) if pages else "", download_file, pages, 1, page_info, gr.update(visible=True), gr.update(visible=False)
84
 
85
 
86
  def handle_page_navigation(direction, current, pages):
@@ -107,7 +104,7 @@ def create_ui():
107
  .page-info { display: inline-block; margin: 0 1rem; }
108
  .processing-controls { display: flex; justify-content: center; gap: 10px; margin-top: 10px; }
109
  """) as demo:
110
- gr.Markdown("Doc2Md: Convert any documents to Markdown")
111
 
112
  with gr.Tabs():
113
  with gr.Tab("Upload and Convert"):
@@ -177,25 +174,24 @@ def create_ui():
177
  outputs=[ocr_dropdown]
178
  )
179
 
180
- # Show cancel button when conversion starts
181
  convert_button.click(
182
- lambda: gr.update(visible=True),
183
  inputs=[],
184
- outputs=[cancel_button]
185
- )
186
-
187
- # Main conversion process
188
- convert_button.click(
189
  fn=handle_convert,
190
  inputs=[file_input, provider_dropdown, ocr_dropdown, output_format],
191
- outputs=[file_display, file_download, content_pages, current_page, page_info, navigation_row, cancel_button]
192
  )
193
 
194
  # Handle cancel button click
195
  cancel_button.click(
196
- fn=cancel_conversion,
197
  inputs=[],
198
- outputs=[cancel_button]
 
199
  )
200
 
201
  prev_btn.click(
 
63
  """Handle file conversion."""
64
  global conversion_cancelled
65
 
66
+ try:
67
+ # Perform the conversion
68
+ content, download_file = convert_file(file_path, parser_name, ocr_method_name, output_format)
69
+
70
+ # If conversion was cancelled, return early
71
+ if content == "Conversion cancelled.":
72
+ return content, None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
73
+
74
+ # Process results
75
+ pages = split_content_into_pages(str(content))
76
+ page_info = f"Page 1/{len(pages)}"
77
+
78
+ return str(pages[0]) if pages else "", download_file, pages, 1, page_info, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
79
+ except Exception as e:
80
+ return f"Error: {str(e)}", None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
 
 
 
81
 
82
 
83
  def handle_page_navigation(direction, current, pages):
 
104
  .page-info { display: inline-block; margin: 0 1rem; }
105
  .processing-controls { display: flex; justify-content: center; gap: 10px; margin-top: 10px; }
106
  """) as demo:
107
+ gr.Markdown("Markit: Convert any documents to Markdown")
108
 
109
  with gr.Tabs():
110
  with gr.Tab("Upload and Convert"):
 
174
  outputs=[ocr_dropdown]
175
  )
176
 
177
+ # Show/hide appropriate buttons when conversion starts
178
  convert_button.click(
179
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
180
  inputs=[],
181
+ outputs=[convert_button, cancel_button],
182
+ queue=False # Execute immediately
183
+ ).then( # Chain the conversion process after button update
 
 
184
  fn=handle_convert,
185
  inputs=[file_input, provider_dropdown, ocr_dropdown, output_format],
186
+ outputs=[file_display, file_download, content_pages, current_page, page_info, navigation_row, convert_button, cancel_button]
187
  )
188
 
189
  # Handle cancel button click
190
  cancel_button.click(
191
+ fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
192
  inputs=[],
193
+ outputs=[convert_button, cancel_button],
194
+ queue=False # Execute immediately
195
  )
196
 
197
  prev_btn.click(