wt002 commited on
Commit
606a4f5
·
verified ·
1 Parent(s): 499242c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -8,7 +8,6 @@ import requests
8
  from typing import List, Dict, Union
9
  import pandas as pd
10
  import wikipediaapi
11
- from pdfminer.high_level import extract_text
12
  from docx import Document
13
 
14
  load_dotenv()
@@ -74,11 +73,14 @@ class BasicAgent:
74
 
75
 
76
  def _process_pdf(self, file_path: str) -> str:
77
- """Extract text from PDF using pdfminer.six (no PyPDF2/pypdf needed)."""
78
  try:
79
- return extract_text(file_path)
 
 
 
80
  except Exception as e:
81
- return f"PDF processing error: {str(e)}"
82
 
83
 
84
  def __call__(self, query: str) -> str:
 
8
  from typing import List, Dict, Union
9
  import pandas as pd
10
  import wikipediaapi
 
11
  from docx import Document
12
 
13
  load_dotenv()
 
73
 
74
 
75
  def _process_pdf(self, file_path: str) -> str:
76
+ """Fallback PDF text extraction (works for simple PDFs)."""
77
  try:
78
+ # Read raw binary data and decode text (very basic)
79
+ with open(file_path, 'rb') as f:
80
+ text = f.read().decode('latin-1') # Try UTF-8 if this fails
81
+ return text if text.strip() else "PDF text not extractable (install pdfminer.six for better results)"
82
  except Exception as e:
83
+ return f"PDF read error: {str(e)}"
84
 
85
 
86
  def __call__(self, query: str) -> str: