Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +1 -1
lib/read_pdf.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import pdfplumber
|
2 |
import re
|
3 |
-
|
4 |
# Extract text as paragraph delimiter without tables and graphs
|
5 |
def extract_and_format_paragraphs(pdf_path):
|
6 |
"""Extract and format paragraphs from a PDF text, applying filters to remove headers, footnotes, and specific sections."""
|
|
|
1 |
import pdfplumber
|
2 |
import re
|
3 |
+
import os
|
4 |
# Extract text as paragraph delimiter without tables and graphs
|
5 |
def extract_and_format_paragraphs(pdf_path):
|
6 |
"""Extract and format paragraphs from a PDF text, applying filters to remove headers, footnotes, and specific sections."""
|