Cachoups commited on
Commit
0b9c07f
·
verified ·
1 Parent(s): 7d86ef4

Update lib/read_pdf.py

Browse files
Files changed (1) hide show
  1. lib/read_pdf.py +3 -3
lib/read_pdf.py CHANGED
@@ -54,10 +54,10 @@ def extract_and_format_paragraphs(pdf_path):
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
56
  if paragraph_lines[-1][-1] == "-":
57
- #paragraph_lines[-1] = paragraph_lines[-1][:-1]
58
  paragraph_lines[-1] += line.strip()
59
-
60
- paragraph_lines[-1] += ' ' + line.strip()
61
  else:
62
  # Start a new line in the paragraph
63
  paragraph_lines.append(line.strip())
 
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
56
  if paragraph_lines[-1][-1] == "-":
57
+ paragraph_lines[-1] = paragraph_lines[-1][:-1]
58
  paragraph_lines[-1] += line.strip()
59
+ else:
60
+ paragraph_lines[-1] += ' ' + line.strip()
61
  else:
62
  # Start a new line in the paragraph
63
  paragraph_lines.append(line.strip())