awacke1 commited on
Commit
fe91e4d
·
verified ·
1 Parent(s): b84aa61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -243
app.py CHANGED
@@ -162,240 +162,7 @@ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers,
162
  lines = markdown_text.strip().split('\n')
163
  pdf_content = []
164
  number_pattern = re.compile(r'^\d+\.\s')
165
- heading_pattern = re.compile(r'^(#{1,4})\s+(.+)
166
-
167
- def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts):
168
- buffer = io.BytesIO()
169
- page_width = A4[0] * 2
170
- page_height = A4[1]
171
- doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
172
- styles = getSampleStyleSheet()
173
- spacer_height = 10
174
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts)
175
- try:
176
- available_font_files = glob.glob("*.ttf")
177
- if not available_font_files:
178
- st.error("No .ttf font files found.")
179
- return
180
- selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
181
- if selected_font_path:
182
- pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
183
- pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
184
- except Exception as e:
185
- st.error(f"Font registration error: {e}")
186
- return
187
- total_chars = sum(len(line) for line in pdf_content)
188
- hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
189
- content_density = total_lines * hierarchy_weight + total_chars / 50
190
- usable_height = page_height - 72 - spacer_height
191
- usable_width = page_width - 72
192
- avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
193
- ideal_lines_per_col = 20
194
- suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
195
- num_columns = num_columns if num_columns != 0 else suggested_columns
196
- col_width = usable_width / num_columns
197
- min_font_size = 6
198
- max_font_size = 16
199
- lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
200
- target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
201
- estimated_font_size = int(target_height_per_line / 1.5)
202
- adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
203
- if avg_line_chars > col_width / adjusted_font_size * 10:
204
- adjusted_font_size = int(col_width / (avg_line_chars / 10))
205
- adjusted_font_size = max(min_font_size, adjusted_font_size)
206
- item_style = ParagraphStyle(
207
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
208
- fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
209
- linkUnderline=True
210
- )
211
- numbered_bold_style = ParagraphStyle(
212
- 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
213
- fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
214
- leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
215
- linkUnderline=True
216
- )
217
- section_style = ParagraphStyle(
218
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
219
- textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
220
- linkUnderline=True
221
- )
222
- columns = [[] for _ in range(num_columns)]
223
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
224
- current_line_count = 0
225
- current_column = 0
226
- number_pattern = re.compile(r'^\d+\.\s')
227
- for item in pdf_content:
228
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
229
- current_column += 1
230
- current_line_count = 0
231
- columns[current_column].append(item)
232
- current_line_count += 1
233
- column_cells = [[] for _ in range(num_columns)]
234
- for col_idx, column in enumerate(columns):
235
- for item in column:
236
- if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
237
- content = item[3:-4].strip()
238
- if number_pattern.match(content):
239
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
240
- else:
241
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
242
- else:
243
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
244
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
245
- for cells in column_cells:
246
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
247
- table_data = list(zip(*column_cells)) if column_cells else [[]]
248
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
249
- table.setStyle(TableStyle([
250
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
251
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
252
- ('BACKGROUND', (0, 0), (-1, -1), colors.white),
253
- ('GRID', (0, 0), (-1, -1), 0, colors.white),
254
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
255
- ('LEFTPADDING', (0, 0), (-1, -1), 2),
256
- ('RIGHTPADDING', (0, 0), (-1, -1), 2),
257
- ('TOPPADDING', (0, 0), (-1, -1), 1),
258
- ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
259
- ]))
260
- story = [Spacer(1, spacer_height), table]
261
- doc.build(story)
262
- buffer.seek(0)
263
- return buffer.getvalue()
264
-
265
- def pdf_to_image(pdf_bytes):
266
- try:
267
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
268
- images = []
269
- for page in doc:
270
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
271
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
272
- images.append(img)
273
- doc.close()
274
- return images
275
- except Exception as e:
276
- st.error(f"Failed to render PDF preview: {e}")
277
- return None
278
-
279
- md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
280
- md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
281
-
282
- with st.sidebar:
283
- st.markdown("### PDF Options")
284
- if md_options:
285
- selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
286
- with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
287
- st.session_state.markdown_content = f.read()
288
- else:
289
- st.warning("No markdown file found. Please add one to your folder.")
290
- selected_md = None
291
- st.session_state.markdown_content = ""
292
- available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
293
- selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
294
- index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
295
- base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
296
- render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
297
- auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
298
- enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
299
- add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
300
-
301
- # Add AutoColumns option to automatically determine column count based on line length
302
- auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
303
-
304
- # Auto-determine column count based on longest line if AutoColumns is checked
305
- if auto_columns and 'markdown_content' in st.session_state:
306
- current_markdown = st.session_state.markdown_content
307
- lines = current_markdown.strip().split('\n')
308
- longest_line_words = 0
309
- for line in lines:
310
- if line.strip(): # Skip empty lines
311
- word_count = len(line.split())
312
- longest_line_words = max(longest_line_words, word_count)
313
-
314
- # Set recommended columns based on word count
315
- if longest_line_words > 25:
316
- recommended_columns = 1 # Very long lines need a single column
317
- elif longest_line_words >= 18:
318
- recommended_columns = 2 # Long lines need 2 columns
319
- elif longest_line_words >= 11:
320
- recommended_columns = 3 # Medium lines can use 3 columns
321
- else:
322
- recommended_columns = "Auto" # Default to auto for shorter lines
323
-
324
- st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
325
- else:
326
- recommended_columns = "Auto"
327
-
328
- column_options = ["Auto"] + list(range(1, 7))
329
- num_columns = st.selectbox("Number of Columns", options=column_options,
330
- index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
331
- num_columns = 0 if num_columns == "Auto" else int(num_columns)
332
- st.info("Font size and columns adjust to fit one page.")
333
-
334
- # Changed label from "Modify the markdown content below:" to "Input Markdown"
335
- edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
336
-
337
- # Added emoji to "Update PDF" button and created a two-column layout for buttons
338
- col1, col2 = st.columns(2)
339
- with col1:
340
- if st.button("🔄📄 Update PDF"):
341
- st.session_state.markdown_content = edited_markdown
342
- if selected_md:
343
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
344
- f.write(edited_markdown)
345
- st.rerun()
346
-
347
- # Added "Trim Emojis" button in second column
348
- with col2:
349
- if st.button("✂️ Trim Emojis"):
350
- trimmed_content = trim_emojis_except_numbered(edited_markdown)
351
- st.session_state.markdown_content = trimmed_content
352
- if selected_md:
353
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
354
- f.write(trimmed_content)
355
- st.rerun()
356
-
357
- prefix = get_timestamp_prefix()
358
- st.download_button(
359
- label="💾📝 Save Markdown",
360
- data=st.session_state.markdown_content,
361
- file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
362
- mime="text/markdown"
363
- )
364
- st.markdown("### Text-to-Speech")
365
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
366
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
367
- if st.button("Generate Audio"):
368
- cleaned_text = clean_for_speech(st.session_state.markdown_content)
369
- audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
370
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
371
- st.audio(audio_file)
372
- with open(audio_file, "rb") as f:
373
- audio_bytes = f.read()
374
- st.download_button(
375
- label="💾🔊 Save Audio",
376
- data=audio_bytes,
377
- file_name=audio_filename,
378
- mime="audio/mpeg"
379
- )
380
-
381
- with st.spinner("Generating PDF..."):
382
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)
383
-
384
- with st.container():
385
- pdf_images = pdf_to_image(pdf_bytes)
386
- if pdf_images:
387
- for img in pdf_images:
388
- st.image(img, use_container_width=True)
389
- else:
390
- st.info("Download the PDF to view it locally.")
391
-
392
- with st.sidebar:
393
- st.download_button(
394
- label="💾📄 Save PDF",
395
- data=pdf_bytes,
396
- file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
397
- mime="application/pdf"
398
- ))
399
 
400
  # Track if we've seen the first numbered line already
401
  first_numbered_seen = False
@@ -448,14 +215,14 @@ with st.sidebar:
448
  total_lines = len(pdf_content)
449
  return pdf_content, total_lines
450
 
451
- def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered):
452
  buffer = io.BytesIO()
453
  page_width = A4[0] * 2
454
  page_height = A4[1]
455
  doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
456
  styles = getSampleStyleSheet()
457
  spacer_height = 10
458
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered)
459
  try:
460
  available_font_files = glob.glob("*.ttf")
461
  if not available_font_files:
@@ -517,14 +284,36 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
517
  column_cells = [[] for _ in range(num_columns)]
518
  for col_idx, column in enumerate(columns):
519
  for item in column:
520
- if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
521
- content = item[3:-4].strip()
522
- if number_pattern.match(content):
523
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  else:
525
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
526
  else:
527
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
528
  max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
529
  for cells in column_cells:
530
  cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
@@ -581,6 +370,8 @@ with st.sidebar:
581
  auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
582
  enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
583
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
 
 
584
 
585
  # Add AutoColumns option to automatically determine column count based on line length
586
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
@@ -663,7 +454,7 @@ with st.sidebar:
663
  )
664
 
665
  with st.spinner("Generating PDF..."):
666
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)
667
 
668
  with st.container():
669
  pdf_images = pdf_to_image(pdf_bytes)
 
162
  lines = markdown_text.strip().split('\n')
163
  pdf_content = []
164
  number_pattern = re.compile(r'^\d+\.\s')
165
+ heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Track if we've seen the first numbered line already
168
  first_numbered_seen = False
 
215
  total_lines = len(pdf_content)
216
  return pdf_content, total_lines
217
 
218
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts):
219
  buffer = io.BytesIO()
220
  page_width = A4[0] * 2
221
  page_height = A4[1]
222
  doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
223
  styles = getSampleStyleSheet()
224
  spacer_height = 10
225
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts)
226
  try:
227
  available_font_files = glob.glob("*.ttf")
228
  if not available_font_files:
 
284
  column_cells = [[] for _ in range(num_columns)]
285
  for col_idx, column in enumerate(columns):
286
  for item in column:
287
+ if isinstance(item, str):
288
+ # Handle heading tags if headings_to_fonts is enabled
289
+ heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
290
+ if heading_match:
291
+ level = int(heading_match.group(1))
292
+ heading_text = heading_match.group(2)
293
+ # Create heading styles based on level
294
+ heading_style = ParagraphStyle(
295
+ f'Heading{level}Style',
296
+ parent=styles['Heading1'],
297
+ fontName="DejaVuSans",
298
+ textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
299
+ fontSize=adjusted_font_size * (1.6 - (level-1)*0.15), # Size decreases with level
300
+ leading=adjusted_font_size * (1.8 - (level-1)*0.15),
301
+ spaceAfter=4 - (level-1),
302
+ spaceBefore=6 - (level-1),
303
+ linkUnderline=True
304
+ )
305
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
306
+ # Handle regular bold items
307
+ elif item.startswith("<b>") and item.endswith("</b>"):
308
+ content = item[3:-4].strip()
309
+ if number_pattern.match(content):
310
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
311
+ else:
312
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
313
  else:
314
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
315
  else:
316
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "DejaVuSans"), item_style))
317
  max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
318
  for cells in column_cells:
319
  cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
 
370
  auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
371
  enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
372
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
373
+ headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
374
+ help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
375
 
376
  # Add AutoColumns option to automatically determine column count based on line length
377
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
 
454
  )
455
 
456
  with st.spinner("Generating PDF..."):
457
+ pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts)
458
 
459
  with st.container():
460
  pdf_images = pdf_to_image(pdf_bytes)