AnseMin commited on
Commit
ac7733c
·
1 Parent(s): 02fee92

cancel button fix 5

Browse files
Files changed (2) hide show
  1. src/converter.py +51 -14
  2. src/parser_factory.py +52 -21
src/converter.py CHANGED
@@ -12,13 +12,13 @@ from parser_factory import ParserFactory
12
  import parsers
13
 
14
  # Reference to the cancellation flag from ui.py
15
- # This will be set by the UI when the cancel button is clicked
16
  conversion_cancelled = None
17
 
18
  def set_cancellation_flag(flag):
19
  """Set the reference to the cancellation flag from ui.py"""
20
  global conversion_cancelled
21
  conversion_cancelled = flag
 
22
 
23
 
24
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
@@ -39,23 +39,35 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
 
 
 
 
42
  # Create a temporary file with English filename
43
  temp_input = None
44
  try:
 
 
 
 
 
45
  original_ext = Path(file_path).suffix
46
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
47
  # Copy the content of original file to temp file
48
  with open(file_path, 'rb') as original:
49
  temp_input.write(original.read())
50
  file_path = temp_input.name
51
-
52
- # Early cancellation check
53
  if conversion_cancelled and conversion_cancelled.is_set():
 
54
  cleanup_temp_file(temp_input.name)
55
  return "Conversion cancelled.", None
56
 
57
  # Use the parser factory to parse the document
58
  start = time.time()
 
 
59
  content = ParserFactory.parse_document(
60
  file_path=file_path,
61
  parser_name=parser_name,
@@ -64,28 +76,52 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
64
  cancellation_flag=conversion_cancelled
65
  )
66
 
67
- # If conversion was cancelled during parsing
 
 
 
 
 
 
68
  if conversion_cancelled and conversion_cancelled.is_set():
 
69
  cleanup_temp_file(temp_input.name)
70
  return "Conversion cancelled.", None
71
-
72
  duration = time.time() - start
73
  logging.info(f"Processed in {duration:.2f} seconds.")
74
 
75
- # Determine output file extension
76
- ext = get_output_extension(output_format)
 
 
 
77
 
78
- # Create download file
 
 
 
 
 
 
 
 
 
 
 
 
79
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
80
  tmp.write(content)
81
  tmp_path = tmp.name
82
-
83
- # Cleanup and return
84
  cleanup_temp_file(temp_input.name)
 
85
  return content, tmp_path
86
-
87
  except Exception as e:
88
- if temp_input and temp_input.name:
 
89
  cleanup_temp_file(temp_input.name)
90
  return f"Error: {e}", None
91
 
@@ -94,8 +130,9 @@ def cleanup_temp_file(file_path):
94
  """Helper function to clean up temporary files"""
95
  try:
96
  os.unlink(file_path)
97
- except:
98
- pass
 
99
 
100
 
101
  def get_output_extension(output_format):
 
12
  import parsers
13
 
14
  # Reference to the cancellation flag from ui.py
 
15
  conversion_cancelled = None
16
 
17
  def set_cancellation_flag(flag):
18
  """Set the reference to the cancellation flag from ui.py"""
19
  global conversion_cancelled
20
  conversion_cancelled = flag
21
+ logging.info(f"Cancellation flag set: {flag}")
22
 
23
 
24
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
 
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
42
+ # Log cancellation state at the start
43
+ if conversion_cancelled:
44
+ logging.info(f"Starting conversion. Cancellation flag state: {conversion_cancelled.is_set()}")
45
+
46
  # Create a temporary file with English filename
47
  temp_input = None
48
  try:
49
+ # Check for early cancellation
50
+ if conversion_cancelled and conversion_cancelled.is_set():
51
+ logging.info("Conversion cancelled before file preparation")
52
+ return "Conversion cancelled.", None
53
+
54
  original_ext = Path(file_path).suffix
55
  with tempfile.NamedTemporaryFile(suffix=original_ext, delete=False) as temp_input:
56
  # Copy the content of original file to temp file
57
  with open(file_path, 'rb') as original:
58
  temp_input.write(original.read())
59
  file_path = temp_input.name
60
+
61
+ # Check for cancellation after file preparation
62
  if conversion_cancelled and conversion_cancelled.is_set():
63
+ logging.info("Conversion cancelled after file preparation")
64
  cleanup_temp_file(temp_input.name)
65
  return "Conversion cancelled.", None
66
 
67
  # Use the parser factory to parse the document
68
  start = time.time()
69
+
70
+ # Pass the cancellation flag to the parser factory
71
  content = ParserFactory.parse_document(
72
  file_path=file_path,
73
  parser_name=parser_name,
 
76
  cancellation_flag=conversion_cancelled
77
  )
78
 
79
+ # Check if the content indicates cancellation
80
+ if content == "Conversion cancelled.":
81
+ logging.info("Parser reported cancellation")
82
+ cleanup_temp_file(temp_input.name)
83
+ return content, None
84
+
85
+ # Check for cancellation after parsing
86
  if conversion_cancelled and conversion_cancelled.is_set():
87
+ logging.info("Conversion cancelled after parsing")
88
  cleanup_temp_file(temp_input.name)
89
  return "Conversion cancelled.", None
90
+
91
  duration = time.time() - start
92
  logging.info(f"Processed in {duration:.2f} seconds.")
93
 
94
+ # Check for cancellation before file creation
95
+ if conversion_cancelled and conversion_cancelled.is_set():
96
+ logging.info("Conversion cancelled before file creation")
97
+ cleanup_temp_file(temp_input.name)
98
+ return "Conversion cancelled.", None
99
 
100
+ # Determine the file extension based on the output format
101
+ if output_format == "Markdown":
102
+ ext = ".md"
103
+ elif output_format == "JSON":
104
+ ext = ".json"
105
+ elif output_format == "Text":
106
+ ext = ".txt"
107
+ elif output_format == "Document Tags":
108
+ ext = ".doctags"
109
+ else:
110
+ ext = ".txt"
111
+
112
+ # Create a temporary file for download
113
  with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False, encoding="utf-8") as tmp:
114
  tmp.write(content)
115
  tmp_path = tmp.name
116
+
117
+ # Clean up the temporary input file
118
  cleanup_temp_file(temp_input.name)
119
+
120
  return content, tmp_path
121
+
122
  except Exception as e:
123
+ logging.error(f"Error during conversion: {str(e)}")
124
+ if temp_input and hasattr(temp_input, 'name'):
125
  cleanup_temp_file(temp_input.name)
126
  return f"Error: {e}", None
127
 
 
130
  """Helper function to clean up temporary files"""
131
  try:
132
  os.unlink(file_path)
133
+ logging.info(f"Cleaned up temporary file: {file_path}")
134
+ except Exception as e:
135
+ logging.error(f"Failed to clean up temporary file {file_path}: {str(e)}")
136
 
137
 
138
  def get_output_extension(output_format):
src/parser_factory.py CHANGED
@@ -1,6 +1,7 @@
1
  from typing import Optional, Dict, Any, Union
2
  from pathlib import Path
3
  import threading
 
4
 
5
  from parser_interface import DocumentParser
6
  from parser_registry import ParserRegistry
@@ -28,29 +29,59 @@ class ParserFactory:
28
  return parser_class()
29
 
30
  @classmethod
31
- def parse_document(cls, file_path: Union[str, Path], parser_name: str,
32
- ocr_method_name: str, cancellation_flag: Optional[threading.Event] = None,
 
 
 
33
  **kwargs) -> str:
34
- """Parse a document using the specified parser and OCR method."""
35
- # Quick cancellation check
36
- if cancellation_flag and cancellation_flag.is_set():
37
- return "Conversion cancelled."
38
-
39
- parser = cls.create_parser(parser_name)
40
- if not parser:
41
- raise ValueError(f"Unknown parser: {parser_name}")
42
-
43
- # Get the internal OCR method ID
44
- ocr_method_id = ParserRegistry.get_ocr_method_id(parser_name, ocr_method_name)
45
- if not ocr_method_id:
46
- raise ValueError(f"Unknown OCR method: {ocr_method_name} for parser {parser_name}")
47
-
48
- # Parse the document, passing the cancellation flag
49
- kwargs['cancellation_flag'] = cancellation_flag
50
- result = parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
51
 
52
- # Final cancellation check before returning
 
 
 
 
 
 
 
 
 
 
53
  if cancellation_flag and cancellation_flag.is_set():
 
54
  return "Conversion cancelled."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- return result
 
 
 
 
 
 
1
  from typing import Optional, Dict, Any, Union
2
  from pathlib import Path
3
  import threading
4
+ import logging
5
 
6
  from parser_interface import DocumentParser
7
  from parser_registry import ParserRegistry
 
29
  return parser_class()
30
 
31
  @classmethod
32
+ def parse_document(cls,
33
+ file_path: Union[str, Path],
34
+ parser_name: str,
35
+ ocr_method_name: str,
36
+ cancellation_flag: Optional[threading.Event] = None,
37
  **kwargs) -> str:
38
+ """
39
+ Parse a document using the specified parser and OCR method.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ Args:
42
+ file_path: Path to the document
43
+ parser_name: Name of the parser to use
44
+ ocr_method_name: Display name of the OCR method to use
45
+ cancellation_flag: Optional flag to check for cancellation
46
+ **kwargs: Additional parser-specific options
47
+
48
+ Returns:
49
+ str: The parsed content
50
+ """
51
+ # Check for cancellation at the start
52
  if cancellation_flag and cancellation_flag.is_set():
53
+ logging.info("Conversion cancelled at the start of parsing")
54
  return "Conversion cancelled."
55
+
56
+ try:
57
+ parser = cls.create_parser(parser_name)
58
+ if not parser:
59
+ raise ValueError(f"Unknown parser: {parser_name}")
60
+
61
+ # Get the internal OCR method ID
62
+ ocr_method_id = ParserRegistry.get_ocr_method_id(parser_name, ocr_method_name)
63
+ if not ocr_method_id:
64
+ raise ValueError(f"Unknown OCR method: {ocr_method_name} for parser {parser_name}")
65
+
66
+ # Check for cancellation before parsing
67
+ if cancellation_flag and cancellation_flag.is_set():
68
+ logging.info("Conversion cancelled before parsing starts")
69
+ return "Conversion cancelled."
70
+
71
+ # Parse the document, passing the cancellation flag
72
+ kwargs['cancellation_flag'] = cancellation_flag
73
+ result = parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
74
+
75
+ # Check for cancellation after parsing
76
+ if cancellation_flag and cancellation_flag.is_set():
77
+ logging.info("Conversion cancelled after parsing completes")
78
+ return "Conversion cancelled."
79
+
80
+ return result
81
 
82
+ except Exception as e:
83
+ logging.error(f"Error in parse_document: {str(e)}")
84
+ # Check if the error was due to cancellation
85
+ if cancellation_flag and cancellation_flag.is_set():
86
+ return "Conversion cancelled."
87
+ raise