lamhieu commited on
Commit
3a9d32f
Β·
1 Parent(s): 141a997

chore: update something

Browse files
Files changed (1) hide show
  1. docsifer/service.py +9 -2
docsifer/service.py CHANGED
@@ -111,18 +111,25 @@ class DocsiferService:
111
  # Use a temp directory so MarkItDown sees the real file extension
112
  with tempfile.TemporaryDirectory() as tmpdir:
113
  mime_type = magic.from_file(str(src), mime=True)
 
114
  if not mime_type:
115
  logger.warning(f"Could not detect file type for: {src}")
116
  new_filename = src.name
117
  else:
118
  logger.debug(f"Detected MIME type '{mime_type}' for: {src}")
119
- guessed_ext = mimetypes.guess_extension(mime_type) or ""
120
  new_filename = f"{src.stem}{guessed_ext}"
121
  tmp_path = Path(tmpdir) / new_filename
122
  tmp_path.write_bytes(src.read_bytes())
123
 
 
 
 
 
 
 
 
124
  # If it's HTML and cleanup is requested
125
- if cleanup and tmp_path.suffix.lower() in (".html", ".htm"):
126
  self._maybe_cleanup_html(tmp_path)
127
 
128
  # Decide whether to use LLM or basic
 
111
  # Use a temp directory so MarkItDown sees the real file extension
112
  with tempfile.TemporaryDirectory() as tmpdir:
113
  mime_type = magic.from_file(str(src), mime=True)
114
+ guessed_ext = mimetypes.guess_extension(mime_type) or ".tmp"
115
  if not mime_type:
116
  logger.warning(f"Could not detect file type for: {src}")
117
  new_filename = src.name
118
  else:
119
  logger.debug(f"Detected MIME type '{mime_type}' for: {src}")
 
120
  new_filename = f"{src.stem}{guessed_ext}"
121
  tmp_path = Path(tmpdir) / new_filename
122
  tmp_path.write_bytes(src.read_bytes())
123
 
124
+ logger.info(
125
+ "Using temp file: %s, MIME type: %s, Guessed ext: %s",
126
+ tmp_path,
127
+ mime_type,
128
+ guessed_ext,
129
+ )
130
+
131
  # If it's HTML and cleanup is requested
132
+ if cleanup and guessed_ext.lower() in (".html", ".htm"):
133
  self._maybe_cleanup_html(tmp_path)
134
 
135
  # Decide whether to use LLM or basic