wt002 commited on
Commit
499242c
·
verified ·
1 Parent(s): 7a8d42b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -18
app.py CHANGED
@@ -8,8 +8,7 @@ import requests
8
  from typing import List, Dict, Union
9
  import pandas as pd
10
  import wikipediaapi
11
- import pypdf
12
- from pypdf import PdfReader
13
  from docx import Document
14
 
15
  load_dotenv()
@@ -73,24 +72,13 @@ class BasicAgent:
73
  page = self.wiki.page(query)
74
  return page.summary if page.exists() else "No Wikipedia page found"
75
 
76
- def extract_pdf_text(file_path: str) -> str:
77
- """Works with both pypdf and PyPDF2."""
 
78
  try:
79
- # Prefer 'pypdf' (newer)
80
- from pypdf import PdfReader
81
- except ImportError:
82
- try:
83
- # Fallback to 'PyPDF2'
84
- from PyPDF2 import PdfReader
85
- except ImportError:
86
- return "Error: Install 'pypdf' or 'PyPDF2' first (pip install pypdf)."
87
-
88
- try:
89
- with open(file_path, 'rb') as f:
90
- reader = PdfReader(f)
91
- return "\n".join(page.extract_text() for page in reader.pages)
92
  except Exception as e:
93
- return f"Failed to read PDF: {str(e)}"
94
 
95
 
96
  def __call__(self, query: str) -> str:
 
8
  from typing import List, Dict, Union
9
  import pandas as pd
10
  import wikipediaapi
11
+ from pdfminer.high_level import extract_text
 
12
  from docx import Document
13
 
14
  load_dotenv()
 
72
  page = self.wiki.page(query)
73
  return page.summary if page.exists() else "No Wikipedia page found"
74
 
75
+
76
+ def _process_pdf(self, file_path: str) -> str:
77
+ """Extract text from PDF using pdfminer.six (no PyPDF2/pypdf needed)."""
78
  try:
79
+ return extract_text(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
80
  except Exception as e:
81
+ return f"PDF processing error: {str(e)}"
82
 
83
 
84
  def __call__(self, query: str) -> str: