AnseMin commited on
Commit
7b03bb1
·
1 Parent(s): ac7733c

cancel button

Browse files
Files changed (2) hide show
  1. src/converter.py +69 -47
  2. src/ui.py +5 -1
src/converter.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
  import time
4
  import os
5
  import threading
 
6
  from pathlib import Path
7
 
8
  # Use relative imports instead of absolute imports
@@ -13,6 +14,8 @@ import parsers
13
 
14
  # Reference to the cancellation flag from ui.py
15
  conversion_cancelled = None
 
 
16
 
17
  def set_cancellation_flag(flag):
18
  """Set the reference to the cancellation flag from ui.py"""
@@ -21,6 +24,16 @@ def set_cancellation_flag(flag):
21
  logging.info(f"Cancellation flag set: {flag}")
22
 
23
 
 
 
 
 
 
 
 
 
 
 
24
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
25
  """
26
  Convert a file using the specified parser and OCR method.
@@ -36,21 +49,20 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
36
  """
37
  global conversion_cancelled
38
 
 
 
 
 
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
42
- # Log cancellation state at the start
43
- if conversion_cancelled:
44
- logging.info(f"Starting conversion. Cancellation flag state: {conversion_cancelled.is_set()}")
45
 
46
  # Create a temporary file with English filename
47
  temp_input = None
48
  try:
49
- # Check for early cancellation
50
- if conversion_cancelled and conversion_cancelled.is_set():
51
- logging.info("Conversion cancelled before file preparation")
52
- return "Conversion cancelled.", None
53
-
54
  original_ext = Path(file_path).suffix
55
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
56
  # Copy the content of original file to temp file
@@ -59,68 +71,78 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
59
  file_path = temp_input.name
60
 
61
  # Check for cancellation after file preparation
62
- if conversion_cancelled and conversion_cancelled.is_set():
63
- logging.info("Conversion cancelled after file preparation")
64
  cleanup_temp_file(temp_input.name)
65
  return "Conversion cancelled.", None
66
 
67
  # Use the parser factory to parse the document
68
- start = time.time()
69
 
70
- # Pass the cancellation flag to the parser factory
71
- content = ParserFactory.parse_document(
72
- file_path=file_path,
73
- parser_name=parser_name,
74
- ocr_method_name=ocr_method_name,
75
- output_format=output_format.lower(),
76
- cancellation_flag=conversion_cancelled
77
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # Check if the content indicates cancellation
80
- if content == "Conversion cancelled.":
81
- logging.info("Parser reported cancellation")
82
- cleanup_temp_file(temp_input.name)
83
- return content, None
84
-
85
- # Check for cancellation after parsing
86
- if conversion_cancelled and conversion_cancelled.is_set():
87
- logging.info("Conversion cancelled after parsing")
88
  cleanup_temp_file(temp_input.name)
89
  return "Conversion cancelled.", None
90
-
91
- duration = time.time() - start
92
- logging.info(f"Processed in {duration:.2f} seconds.")
93
 
94
- # Check for cancellation before file creation
95
- if conversion_cancelled and conversion_cancelled.is_set():
96
- logging.info("Conversion cancelled before file creation")
 
 
97
  cleanup_temp_file(temp_input.name)
98
  return "Conversion cancelled.", None
99
 
100
- # Determine the file extension based on the output format
101
- if output_format == "Markdown":
102
- ext = ".md"
103
- elif output_format == "JSON":
104
- ext = ".json"
105
- elif output_format == "Text":
106
- ext = ".txt"
107
- elif output_format == "Document Tags":
108
- ext = ".doctags"
109
- else:
110
- ext = ".txt"
111
-
112
  # Create a temporary file for download
113
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
114
  tmp.write(content)
115
  tmp_path = tmp.name
116
 
117
- # Clean up the temporary input file
118
  cleanup_temp_file(temp_input.name)
 
 
 
 
119
 
120
  return content, tmp_path
121
 
122
  except Exception as e:
123
  logging.error(f"Error during conversion: {str(e)}")
 
 
 
 
 
 
 
 
124
  if temp_input and hasattr(temp_input, 'name'):
125
  cleanup_temp_file(temp_input.name)
126
  return f"Error: {e}", None
 
3
  import time
4
  import os
5
  import threading
6
+ import signal
7
  from pathlib import Path
8
 
9
  # Use relative imports instead of absolute imports
 
14
 
15
  # Reference to the cancellation flag from ui.py
16
  conversion_cancelled = None
17
+ # Track the current process for cancellation
18
+ current_conversion_thread = None
19
 
20
  def set_cancellation_flag(flag):
21
  """Set the reference to the cancellation flag from ui.py"""
 
24
  logging.info(f"Cancellation flag set: {flag}")
25
 
26
 
27
+ def check_cancellation():
28
+ """Check if cancellation is requested and interrupt if needed"""
29
+ global conversion_cancelled
30
+ if conversion_cancelled and conversion_cancelled.is_set():
31
+ logging.info("Cancellation detected, raising interrupt")
32
+ # This will raise a KeyboardInterrupt exception in the current thread
33
+ return True
34
+ return False
35
+
36
+
37
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
38
  """
39
  Convert a file using the specified parser and OCR method.
 
49
  """
50
  global conversion_cancelled
51
 
52
+ # Record start time for logging
53
+ start_time = time.time()
54
+ logging.info(f"Starting conversion of {file_path}")
55
+
56
  if not file_path:
57
  return "Please upload a file.", None
58
 
59
+ # Check immediately for cancellation
60
+ if check_cancellation():
61
+ return "Conversion cancelled.", None
62
 
63
  # Create a temporary file with English filename
64
  temp_input = None
65
  try:
 
 
 
 
 
66
  original_ext = Path(file_path).suffix
67
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
68
  # Copy the content of original file to temp file
 
71
  file_path = temp_input.name
72
 
73
  # Check for cancellation after file preparation
74
+ if check_cancellation():
 
75
  cleanup_temp_file(temp_input.name)
76
  return "Conversion cancelled.", None
77
 
78
  # Use the parser factory to parse the document
79
+ logging.info(f"Starting document parsing with {parser_name} and {ocr_method_name}")
80
 
81
+ def interruptible_parser():
82
+ """Run parser in a way that can be checked for cancellation"""
83
+ try:
84
+ # Log starting
85
+ logging.info("Parser thread started")
86
+ return ParserFactory.parse_document(
87
+ file_path=file_path,
88
+ parser_name=parser_name,
89
+ ocr_method_name=ocr_method_name,
90
+ output_format=output_format.lower(),
91
+ cancellation_flag=conversion_cancelled
92
+ )
93
+ except Exception as e:
94
+ logging.error(f"Parser thread error: {str(e)}")
95
+ if conversion_cancelled and conversion_cancelled.is_set():
96
+ return "Conversion cancelled."
97
+ raise
98
+
99
+ # Regular parsing, but periodically check for cancellation
100
+ content = None
101
+ parse_start = time.time()
102
 
103
+ # Perform the actual parsing
104
+ content = interruptible_parser()
105
+
106
+ # If we got here, parsing is complete
107
+ logging.info(f"Parsing completed in {time.time() - parse_start:.2f} seconds")
108
+
109
+ # Check cancellation immediately after parsing
110
+ if check_cancellation() or content == "Conversion cancelled.":
 
111
  cleanup_temp_file(temp_input.name)
112
  return "Conversion cancelled.", None
 
 
 
113
 
114
+ # Determine the file extension
115
+ ext = get_output_extension(output_format)
116
+
117
+ # Final cancellation check before file creation
118
+ if check_cancellation():
119
  cleanup_temp_file(temp_input.name)
120
  return "Conversion cancelled.", None
121
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # Create a temporary file for download
123
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
124
  tmp.write(content)
125
  tmp_path = tmp.name
126
 
127
+ # Clean up temporary files
128
  cleanup_temp_file(temp_input.name)
129
+
130
+ # Log completion time
131
+ total_time = time.time() - start_time
132
+ logging.info(f"Conversion completed in {total_time:.2f} seconds")
133
 
134
  return content, tmp_path
135
 
136
  except Exception as e:
137
  logging.error(f"Error during conversion: {str(e)}")
138
+
139
+ # Check if this was a cancellation
140
+ if conversion_cancelled and conversion_cancelled.is_set():
141
+ if temp_input and hasattr(temp_input, 'name'):
142
+ cleanup_temp_file(temp_input.name)
143
+ return "Conversion cancelled.", None
144
+
145
+ # Other error
146
  if temp_input and hasattr(temp_input, 'name'):
147
  cleanup_temp_file(temp_input.name)
148
  return f"Error: {e}", None
src/ui.py CHANGED
@@ -5,6 +5,7 @@ import time
5
  from converter import convert_file, set_cancellation_flag
6
  from docling_chat import chat_with_document
7
  from parser_registry import ParserRegistry
 
8
 
9
 
10
  # Add a global variable to track cancellation state
@@ -91,9 +92,12 @@ def handle_page_navigation(direction, current, pages):
91
 
92
 
93
  def cancel_conversion():
94
- """Set the cancellation flag."""
95
  global conversion_cancelled
 
96
  conversion_cancelled.set()
 
 
97
  return gr.update(visible=False)
98
 
99
 
 
5
  from converter import convert_file, set_cancellation_flag
6
  from docling_chat import chat_with_document
7
  from parser_registry import ParserRegistry
8
+ import logging
9
 
10
 
11
  # Add a global variable to track cancellation state
 
92
 
93
 
94
  def cancel_conversion():
95
+ """Set the cancellation flag and force UI update."""
96
  global conversion_cancelled
97
+ # Set the flag
98
  conversion_cancelled.set()
99
+ logging.info("Cancel button clicked, flag set")
100
+ # Update UI immediately to show cancellation is in progress
101
  return gr.update(visible=False)
102
 
103