Cachoups commited on
Commit
7d86ef4
·
verified ·
1 Parent(s): a81d783

Update lib/read_pdf.py

Browse files
Files changed (1) hide show
  1. lib/read_pdf.py +2 -2
lib/read_pdf.py CHANGED
@@ -54,7 +54,7 @@ def extract_and_format_paragraphs(pdf_path):
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
56
  if paragraph_lines[-1][-1] == "-":
57
- paragraph_lines[-1] = paragraph_lines[-1][:-1]
58
  paragraph_lines[-1] += line.strip()
59
 
60
  paragraph_lines[-1] += ' ' + line.strip()
@@ -104,7 +104,7 @@ def extract_and_format_paragraphs(pdf_path):
104
  width = page.width
105
  height = page.height
106
 
107
- header_height = height * 0.1 # Adjust this value based on your PDF
108
  #footer_height = height * 0.1 # Adjust this value based on your PDF
109
 
110
  left_bbox = (0, header_height, width / 2, height) # Left column
 
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
56
  if paragraph_lines[-1][-1] == "-":
57
+ #paragraph_lines[-1] = paragraph_lines[-1][:-1]
58
  paragraph_lines[-1] += line.strip()
59
 
60
  paragraph_lines[-1] += ' ' + line.strip()
 
104
  width = page.width
105
  height = page.height
106
 
107
+ header_height = height * 0.08 # Adjust this value based on your PDF
108
  #footer_height = height * 0.1 # Adjust this value based on your PDF
109
 
110
  left_bbox = (0, header_height, width / 2, height) # Left column