Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +2 -2
lib/read_pdf.py
CHANGED
@@ -54,7 +54,7 @@ def extract_and_format_paragraphs(pdf_path):
|
|
54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
55 |
# This line is a continuation of the previous one
|
56 |
if paragraph_lines[-1][-1] == "-":
|
57 |
-
paragraph_lines[-1] = paragraph_lines[-1][:-1]
|
58 |
paragraph_lines[-1] += line.strip()
|
59 |
|
60 |
paragraph_lines[-1] += ' ' + line.strip()
|
@@ -104,7 +104,7 @@ def extract_and_format_paragraphs(pdf_path):
|
|
104 |
width = page.width
|
105 |
height = page.height
|
106 |
|
107 |
-
header_height = height * 0.
|
108 |
#footer_height = height * 0.1 # Adjust this value based on your PDF
|
109 |
|
110 |
left_bbox = (0, header_height, width / 2, height) # Left column
|
|
|
54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
55 |
# This line is a continuation of the previous one
|
56 |
if paragraph_lines[-1][-1] == "-":
|
57 |
+
#paragraph_lines[-1] = paragraph_lines[-1][:-1]
|
58 |
paragraph_lines[-1] += line.strip()
|
59 |
|
60 |
paragraph_lines[-1] += ' ' + line.strip()
|
|
|
104 |
width = page.width
|
105 |
height = page.height
|
106 |
|
107 |
+
header_height = height * 0.08 # Adjust this value based on your PDF
|
108 |
#footer_height = height * 0.1 # Adjust this value based on your PDF
|
109 |
|
110 |
left_bbox = (0, header_height, width / 2, height) # Left column
|