Spaces:
Runtime error
Runtime error
File size: 645 Bytes
66340f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from app.parser.interface import ParserInterface
import pdfplumber
class PdfParser(ParserInterface):
"""
A parser that extracts text from PDF documents.
Methods:
parse(file_path: str) -> str:
Reads the given PDF file and returns the concatenated text from all its pages.
"""
def parse(file_path) -> str:
"""
Args:
file_path (str): The path to the PDF file to parse.
Returns:
str: The concatenated text from all pages in the file.
"""
pdf = pdfplumber.open(file_path)
return " ".join([page.extract_text() for page in pdf.pages])
|