awacke1 commited on
Commit
d6bd877
Β·
verified Β·
1 Parent(s): 95e873b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -32
app.py CHANGED
@@ -17,9 +17,13 @@ from reportlab.pdfbase import pdfmetrics
17
  from reportlab.pdfbase.ttfonts import TTFont
18
  from datetime import datetime
19
  import pytz
 
 
 
20
 
21
  st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
 
 
23
  def get_timestamp_prefix():
24
  central = pytz.timezone("US/Central")
25
  now = datetime.now(central)
@@ -63,10 +67,8 @@ def trim_emojis_except_numbered(markdown_text):
63
 
64
  for line in lines:
65
  if number_pattern.match(line):
66
- # Keep emojis in numbered lines
67
  processed_lines.append(line)
68
  else:
69
- # Remove emojis from other lines
70
  processed_lines.append(emoji_pattern.sub('', line))
71
 
72
  return '\n'.join(processed_lines)
@@ -163,8 +165,6 @@ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers,
163
  pdf_content = []
164
  number_pattern = re.compile(r'^\d+\.\s')
165
  heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
166
-
167
- # Track if we've seen the first numbered line already
168
  first_numbered_seen = False
169
 
170
  for line in lines:
@@ -172,34 +172,26 @@ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers,
172
  if not line:
173
  continue
174
 
175
- # Process headings if headings_to_fonts is enabled
176
  if headings_to_fonts and line.startswith('#'):
177
  heading_match = heading_pattern.match(line)
178
  if heading_match:
179
- level = len(heading_match.group(1)) # Number of # symbols
180
  heading_text = heading_match.group(2).strip()
181
- # Convert the heading to bold with appropriate formatting
182
  formatted_heading = f"<h{level}>{heading_text}</h{level}>"
183
  pdf_content.append(formatted_heading)
184
  continue
185
 
186
- # Check if this is a numbered line
187
  is_numbered_line = number_pattern.match(line) is not None
188
 
189
- # Add a blank line before numbered lines (except the first one with "1.")
190
  if add_space_before_numbered and is_numbered_line:
191
- # Only add space if this isn't the first numbered line
192
  if first_numbered_seen and not line.startswith("1."):
193
- pdf_content.append("") # Add an empty line
194
- # Mark that we've seen a numbered line
195
  if not first_numbered_seen:
196
  first_numbered_seen = True
197
 
198
  line = detect_and_convert_links(line)
199
 
200
- # Process bold text (*word* or **word**)
201
  if render_with_bold or headings_to_fonts:
202
- # Handle both *word* and **word** patterns
203
  line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
204
  if headings_to_fonts:
205
  line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
@@ -285,25 +277,22 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
285
  for col_idx, column in enumerate(columns):
286
  for item in column:
287
  if isinstance(item, str):
288
- # Handle heading tags if headings_to_fonts is enabled
289
  heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
290
  if heading_match:
291
  level = int(heading_match.group(1))
292
  heading_text = heading_match.group(2)
293
- # Create heading styles based on level
294
  heading_style = ParagraphStyle(
295
  f'Heading{level}Style',
296
  parent=styles['Heading1'],
297
  fontName="DejaVuSans",
298
  textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
299
- fontSize=adjusted_font_size * (1.6 - (level-1)*0.15), # Size decreases with level
300
  leading=adjusted_font_size * (1.8 - (level-1)*0.15),
301
  spaceAfter=4 - (level-1),
302
  spaceBefore=6 - (level-1),
303
  linkUnderline=True
304
  )
305
  column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
306
- # Handle regular bold items
307
  elif item.startswith("<b>") and item.endswith("</b>"):
308
  content = item[3:-4].strip()
309
  if number_pattern.match(content):
@@ -349,6 +338,86 @@ def pdf_to_image(pdf_bytes):
349
  st.error(f"Failed to render PDF preview: {e}")
350
  return None
351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
353
  md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
354
 
@@ -372,30 +441,24 @@ with st.sidebar:
372
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
373
  headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
374
  help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
375
-
376
- # Add AutoColumns option to automatically determine column count based on line length
377
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
378
 
379
- # Auto-determine column count based on longest line if AutoColumns is checked
380
  if auto_columns and 'markdown_content' in st.session_state:
381
  current_markdown = st.session_state.markdown_content
382
  lines = current_markdown.strip().split('\n')
383
  longest_line_words = 0
384
  for line in lines:
385
- if line.strip(): # Skip empty lines
386
  word_count = len(line.split())
387
  longest_line_words = max(longest_line_words, word_count)
388
-
389
- # Set recommended columns based on word count
390
  if longest_line_words > 25:
391
- recommended_columns = 1 # Very long lines need a single column
392
  elif longest_line_words >= 18:
393
- recommended_columns = 2 # Long lines need 2 columns
394
  elif longest_line_words >= 11:
395
- recommended_columns = 3 # Medium lines can use 3 columns
396
  else:
397
- recommended_columns = "Auto" # Default to auto for shorter lines
398
-
399
  st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
400
  else:
401
  recommended_columns = "Auto"
@@ -406,10 +469,8 @@ with st.sidebar:
406
  num_columns = 0 if num_columns == "Auto" else int(num_columns)
407
  st.info("Font size and columns adjust to fit one page.")
408
 
409
- # Changed label from "Modify the markdown content below:" to "Input Markdown"
410
  edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
411
 
412
- # Added emoji to "Update PDF" button and created a two-column layout for buttons
413
  col1, col2 = st.columns(2)
414
  with col1:
415
  if st.button("πŸ”„πŸ“„ Update PDF"):
@@ -419,7 +480,6 @@ with st.sidebar:
419
  f.write(edited_markdown)
420
  st.rerun()
421
 
422
- # Added "Trim Emojis" button in second column
423
  with col2:
424
  if st.button("βœ‚οΈ Trim Emojis"):
425
  trimmed_content = trim_emojis_except_numbered(edited_markdown)
@@ -452,6 +512,43 @@ with st.sidebar:
452
  file_name=audio_filename,
453
  mime="audio/mpeg"
454
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
  with st.spinner("Generating PDF..."):
457
  pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts)
 
17
  from reportlab.pdfbase.ttfonts import TTFont
18
  from datetime import datetime
19
  import pytz
20
+ from pypdf import PdfReader, PdfWriter
21
+ from pypdf.annotations import Link
22
+ from reportlab.pdfgen import canvas
23
 
24
  st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
25
 
26
+ # Existing functions (unchanged)
27
  def get_timestamp_prefix():
28
  central = pytz.timezone("US/Central")
29
  now = datetime.now(central)
 
67
 
68
  for line in lines:
69
  if number_pattern.match(line):
 
70
  processed_lines.append(line)
71
  else:
 
72
  processed_lines.append(emoji_pattern.sub('', line))
73
 
74
  return '\n'.join(processed_lines)
 
165
  pdf_content = []
166
  number_pattern = re.compile(r'^\d+\.\s')
167
  heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
 
 
168
  first_numbered_seen = False
169
 
170
  for line in lines:
 
172
  if not line:
173
  continue
174
 
 
175
  if headings_to_fonts and line.startswith('#'):
176
  heading_match = heading_pattern.match(line)
177
  if heading_match:
178
+ level = len(heading_match.group(1))
179
  heading_text = heading_match.group(2).strip()
 
180
  formatted_heading = f"<h{level}>{heading_text}</h{level}>"
181
  pdf_content.append(formatted_heading)
182
  continue
183
 
 
184
  is_numbered_line = number_pattern.match(line) is not None
185
 
 
186
  if add_space_before_numbered and is_numbered_line:
 
187
  if first_numbered_seen and not line.startswith("1."):
188
+ pdf_content.append("")
 
189
  if not first_numbered_seen:
190
  first_numbered_seen = True
191
 
192
  line = detect_and_convert_links(line)
193
 
 
194
  if render_with_bold or headings_to_fonts:
 
195
  line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
196
  if headings_to_fonts:
197
  line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
 
277
  for col_idx, column in enumerate(columns):
278
  for item in column:
279
  if isinstance(item, str):
 
280
  heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
281
  if heading_match:
282
  level = int(heading_match.group(1))
283
  heading_text = heading_match.group(2)
 
284
  heading_style = ParagraphStyle(
285
  f'Heading{level}Style',
286
  parent=styles['Heading1'],
287
  fontName="DejaVuSans",
288
  textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
289
+ fontSize=adjusted_font_size * (1.6 - (level-1)*0.15),
290
  leading=adjusted_font_size * (1.8 - (level-1)*0.15),
291
  spaceAfter=4 - (level-1),
292
  spaceBefore=6 - (level-1),
293
  linkUnderline=True
294
  )
295
  column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
 
296
  elif item.startswith("<b>") and item.endswith("</b>"):
297
  content = item[3:-4].strip()
298
  if number_pattern.match(content):
 
338
  st.error(f"Failed to render PDF preview: {e}")
339
  return None
340
 
341
+ # PDF creation and linking function
342
+ WORDS = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
343
+
344
+ def create_and_link_pdfs(source_pdf="TestSource.pdf", target_pdf="TestTarget.pdf"):
345
+ """Create two PDFs with numbered lists, add links and bookmarks."""
346
+ def create_base_pdf(filename):
347
+ buffer = io.BytesIO()
348
+ c = canvas.Canvas(buffer)
349
+ c.setFont("Helvetica", 12)
350
+ for i, word in enumerate(WORDS, 1):
351
+ y = 800 - (i * 20)
352
+ c.drawString(50, y, f"{i}. {word}")
353
+ c.showPage()
354
+ c.save()
355
+ buffer.seek(0)
356
+ with open(filename, "wb") as f:
357
+ f.write(buffer.getvalue())
358
+ buffer.close()
359
+
360
+ def add_bookmark_to_seven(pdf_file):
361
+ reader = PdfReader(pdf_file)
362
+ writer = PdfWriter()
363
+ for page in reader.pages:
364
+ writer.add_page(page)
365
+ page = writer.pages[0]
366
+ y_position = 800 - (7 * 20)
367
+ writer.add_bookmark("Seven Bookmark", 0, [50, y_position])
368
+ with open(pdf_file, "wb") as f:
369
+ writer.write(f)
370
+
371
+ def modify_source_pdf(source, target):
372
+ reader = PdfReader(source)
373
+ writer = PdfWriter()
374
+ for page in reader.pages:
375
+ writer.add_page(page)
376
+ buffer = io.BytesIO()
377
+ c = canvas.Canvas(buffer)
378
+ c.setFont("Helvetica", 8)
379
+ seven_y = 800 - (7 * 20)
380
+ c.drawString(90, seven_y - 5, "link")
381
+ c.showPage()
382
+ c.save()
383
+ buffer.seek(0)
384
+ text_pdf = PdfReader(buffer)
385
+ page = writer.pages[0]
386
+ page.merge_page(text_pdf.pages[0])
387
+ link = Link(
388
+ rect=(90, seven_y - 10, 150, seven_y + 10),
389
+ target=f"{target}#page=1"
390
+ )
391
+ writer.add_annotation(page_number=0, annotation=link)
392
+ with open(source, "wb") as f:
393
+ writer.write(f)
394
+ buffer.close()
395
+
396
+ def add_internal_link(pdf_file):
397
+ reader = PdfReader(pdf_file)
398
+ writer = PdfWriter()
399
+ for page in reader.pages:
400
+ writer.add_page(page)
401
+ one_y = 800 - (1 * 20)
402
+ ten_y = 800 - (10 * 20)
403
+ link = Link(
404
+ rect=(50, one_y - 10, 100, one_y + 10),
405
+ target_page=0,
406
+ target_position=[50, ten_y, 0]
407
+ )
408
+ writer.add_annotation(page_number=0, annotation=link)
409
+ with open(pdf_file, "wb") as f:
410
+ writer.write(f)
411
+
412
+ create_base_pdf(source_pdf)
413
+ create_base_pdf(target_pdf)
414
+ add_bookmark_to_seven(target_pdf)
415
+ modify_source_pdf(source_pdf, target_pdf)
416
+ add_internal_link(source_pdf)
417
+ add_internal_link(target_pdf)
418
+ return source_pdf, target_pdf
419
+
420
+ # Streamlit UI
421
  md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
422
  md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
423
 
 
441
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
442
  headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
443
  help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
 
 
444
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
445
 
 
446
  if auto_columns and 'markdown_content' in st.session_state:
447
  current_markdown = st.session_state.markdown_content
448
  lines = current_markdown.strip().split('\n')
449
  longest_line_words = 0
450
  for line in lines:
451
+ if line.strip():
452
  word_count = len(line.split())
453
  longest_line_words = max(longest_line_words, word_count)
 
 
454
  if longest_line_words > 25:
455
+ recommended_columns = 1
456
  elif longest_line_words >= 18:
457
+ recommended_columns = 2
458
  elif longest_line_words >= 11:
459
+ recommended_columns = 3
460
  else:
461
+ recommended_columns = "Auto"
 
462
  st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
463
  else:
464
  recommended_columns = "Auto"
 
469
  num_columns = 0 if num_columns == "Auto" else int(num_columns)
470
  st.info("Font size and columns adjust to fit one page.")
471
 
 
472
  edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
473
 
 
474
  col1, col2 = st.columns(2)
475
  with col1:
476
  if st.button("πŸ”„πŸ“„ Update PDF"):
 
480
  f.write(edited_markdown)
481
  st.rerun()
482
 
 
483
  with col2:
484
  if st.button("βœ‚οΈ Trim Emojis"):
485
  trimmed_content = trim_emojis_except_numbered(edited_markdown)
 
512
  file_name=audio_filename,
513
  mime="audio/mpeg"
514
  )
515
+
516
+ # Existing "Create Linked PDFs" button
517
+ if st.button("πŸ“‘ Create Linked PDFs"):
518
+ with st.spinner("Creating and linking PDFs..."):
519
+ source_pdf, target_pdf = create_and_link_pdfs()
520
+ st.success(f"Created {source_pdf} and {target_pdf}")
521
+ for pdf_file in [source_pdf, target_pdf]:
522
+ with open(pdf_file, "rb") as f:
523
+ st.download_button(
524
+ label=f"πŸ’Ύ Download {pdf_file}",
525
+ data=f.read(),
526
+ file_name=pdf_file,
527
+ mime="application/pdf"
528
+ )
529
+
530
+ # New "Test PDFs" button
531
+ if st.button("πŸ§ͺ Test PDFs"):
532
+ with st.spinner("Generating and testing PDFs..."):
533
+ source_pdf, target_pdf = create_and_link_pdfs()
534
+ st.success(f"Generated {source_pdf} and {target_pdf}")
535
+ # Display PDFs as images
536
+ for pdf_file in [source_pdf, target_pdf]:
537
+ with open(pdf_file, "rb") as f:
538
+ pdf_bytes = f.read()
539
+ images = pdf_to_image(pdf_bytes)
540
+ if images:
541
+ st.subheader(f"Preview of {pdf_file}")
542
+ for img in images:
543
+ st.image(img, caption=f"{pdf_file} Page", use_container_width=True)
544
+ # Provide download option
545
+ with open(pdf_file, "rb") as f:
546
+ st.download_button(
547
+ label=f"πŸ’Ύ Download {pdf_file}",
548
+ data=f.read(),
549
+ file_name=pdf_file,
550
+ mime="application/pdf"
551
+ )
552
 
553
  with st.spinner("Generating PDF..."):
554
  pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts)