Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,7 @@ import requests
|
|
8 |
from typing import List, Dict, Union
|
9 |
import pandas as pd
|
10 |
import wikipediaapi
|
11 |
-
import
|
12 |
-
from pypdf import PdfReader
|
13 |
from docx import Document
|
14 |
|
15 |
load_dotenv()
|
@@ -73,24 +72,13 @@ class BasicAgent:
|
|
73 |
page = self.wiki.page(query)
|
74 |
return page.summary if page.exists() else "No Wikipedia page found"
|
75 |
|
76 |
-
|
77 |
-
|
|
|
78 |
try:
|
79 |
-
|
80 |
-
from pypdf import PdfReader
|
81 |
-
except ImportError:
|
82 |
-
try:
|
83 |
-
# Fallback to 'PyPDF2'
|
84 |
-
from PyPDF2 import PdfReader
|
85 |
-
except ImportError:
|
86 |
-
return "Error: Install 'pypdf' or 'PyPDF2' first (pip install pypdf)."
|
87 |
-
|
88 |
-
try:
|
89 |
-
with open(file_path, 'rb') as f:
|
90 |
-
reader = PdfReader(f)
|
91 |
-
return "\n".join(page.extract_text() for page in reader.pages)
|
92 |
except Exception as e:
|
93 |
-
return f"
|
94 |
|
95 |
|
96 |
def __call__(self, query: str) -> str:
|
|
|
8 |
from typing import List, Dict, Union
|
9 |
import pandas as pd
|
10 |
import wikipediaapi
|
11 |
+
from pdfminer.high_level import extract_text
|
|
|
12 |
from docx import Document
|
13 |
|
14 |
load_dotenv()
|
|
|
72 |
page = self.wiki.page(query)
|
73 |
return page.summary if page.exists() else "No Wikipedia page found"
|
74 |
|
75 |
+
|
76 |
+
def _process_pdf(self, file_path: str) -> str:
|
77 |
+
"""Extract text from PDF using pdfminer.six (no PyPDF2/pypdf needed)."""
|
78 |
try:
|
79 |
+
return extract_text(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
except Exception as e:
|
81 |
+
return f"PDF processing error: {str(e)}"
|
82 |
|
83 |
|
84 |
def __call__(self, query: str) -> str:
|