awacke1 commited on
Commit
b84aa61
Β·
verified Β·
1 Parent(s): 703c2b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +255 -4
app.py CHANGED
@@ -158,19 +158,264 @@ def apply_emoji_font(text, emoji_font):
158
  combined_text = before + f'<a href="{url}">{label}</a>' + after
159
  return combined_text
160
 
161
- def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered):
162
  lines = markdown_text.strip().split('\n')
163
  pdf_content = []
164
  number_pattern = re.compile(r'^\d+\.\s')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  # Track if we've seen the first numbered line already
167
  first_numbered_seen = False
168
 
169
  for line in lines:
170
  line = line.strip()
171
- if not line or line.startswith('# '):
172
  continue
173
 
 
 
 
 
 
 
 
 
 
 
 
174
  # Check if this is a numbered line
175
  is_numbered_line = number_pattern.match(line) is not None
176
 
@@ -184,8 +429,14 @@ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers,
184
  first_numbered_seen = True
185
 
186
  line = detect_and_convert_links(line)
187
- if render_with_bold:
188
- line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
 
 
 
 
 
 
189
  if auto_bold_numbers and is_numbered_line:
190
  if not (line.startswith("<b>") and line.endswith("</b>")):
191
  if "<b>" in line and "</b>" in line:
 
158
  combined_text = before + f'<a href="{url}">{label}</a>' + after
159
  return combined_text
160
 
161
+ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts):
162
  lines = markdown_text.strip().split('\n')
163
  pdf_content = []
164
  number_pattern = re.compile(r'^\d+\.\s')
165
+ heading_pattern = re.compile(r'^(#{1,4})\s+(.+)
166
+
167
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts):
168
+ buffer = io.BytesIO()
169
+ page_width = A4[0] * 2
170
+ page_height = A4[1]
171
+ doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
172
+ styles = getSampleStyleSheet()
173
+ spacer_height = 10
174
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts)
175
+ try:
176
+ available_font_files = glob.glob("*.ttf")
177
+ if not available_font_files:
178
+ st.error("No .ttf font files found.")
179
+ return
180
+ selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
181
+ if selected_font_path:
182
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
183
+ pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
184
+ except Exception as e:
185
+ st.error(f"Font registration error: {e}")
186
+ return
187
+ total_chars = sum(len(line) for line in pdf_content)
188
+ hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
189
+ content_density = total_lines * hierarchy_weight + total_chars / 50
190
+ usable_height = page_height - 72 - spacer_height
191
+ usable_width = page_width - 72
192
+ avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
193
+ ideal_lines_per_col = 20
194
+ suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
195
+ num_columns = num_columns if num_columns != 0 else suggested_columns
196
+ col_width = usable_width / num_columns
197
+ min_font_size = 6
198
+ max_font_size = 16
199
+ lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
200
+ target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
201
+ estimated_font_size = int(target_height_per_line / 1.5)
202
+ adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
203
+ if avg_line_chars > col_width / adjusted_font_size * 10:
204
+ adjusted_font_size = int(col_width / (avg_line_chars / 10))
205
+ adjusted_font_size = max(min_font_size, adjusted_font_size)
206
+ item_style = ParagraphStyle(
207
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
208
+ fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
209
+ linkUnderline=True
210
+ )
211
+ numbered_bold_style = ParagraphStyle(
212
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
213
+ fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
214
+ leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
215
+ linkUnderline=True
216
+ )
217
+ section_style = ParagraphStyle(
218
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
219
+ textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
220
+ linkUnderline=True
221
+ )
222
+ columns = [[] for _ in range(num_columns)]
223
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
224
+ current_line_count = 0
225
+ current_column = 0
226
+ number_pattern = re.compile(r'^\d+\.\s')
227
+ for item in pdf_content:
228
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
229
+ current_column += 1
230
+ current_line_count = 0
231
+ columns[current_column].append(item)
232
+ current_line_count += 1
233
+ column_cells = [[] for _ in range(num_columns)]
234
+ for col_idx, column in enumerate(columns):
235
+ for item in column:
236
+ if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
237
+ content = item[3:-4].strip()
238
+ if number_pattern.match(content):
239
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
240
+ else:
241
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
242
+ else:
243
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
244
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
245
+ for cells in column_cells:
246
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
247
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
248
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
249
+ table.setStyle(TableStyle([
250
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
251
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
252
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
253
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
254
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
255
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
256
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
257
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
258
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
259
+ ]))
260
+ story = [Spacer(1, spacer_height), table]
261
+ doc.build(story)
262
+ buffer.seek(0)
263
+ return buffer.getvalue()
264
+
265
+ def pdf_to_image(pdf_bytes):
266
+ try:
267
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
268
+ images = []
269
+ for page in doc:
270
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
271
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
272
+ images.append(img)
273
+ doc.close()
274
+ return images
275
+ except Exception as e:
276
+ st.error(f"Failed to render PDF preview: {e}")
277
+ return None
278
+
279
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
280
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
281
+
282
+ with st.sidebar:
283
+ st.markdown("### PDF Options")
284
+ if md_options:
285
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
286
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
287
+ st.session_state.markdown_content = f.read()
288
+ else:
289
+ st.warning("No markdown file found. Please add one to your folder.")
290
+ selected_md = None
291
+ st.session_state.markdown_content = ""
292
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
293
+ selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
294
+ index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
295
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
296
+ render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
297
+ auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
298
+ enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
299
+ add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
300
+
301
+ # Add AutoColumns option to automatically determine column count based on line length
302
+ auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
303
+
304
+ # Auto-determine column count based on longest line if AutoColumns is checked
305
+ if auto_columns and 'markdown_content' in st.session_state:
306
+ current_markdown = st.session_state.markdown_content
307
+ lines = current_markdown.strip().split('\n')
308
+ longest_line_words = 0
309
+ for line in lines:
310
+ if line.strip(): # Skip empty lines
311
+ word_count = len(line.split())
312
+ longest_line_words = max(longest_line_words, word_count)
313
+
314
+ # Set recommended columns based on word count
315
+ if longest_line_words > 25:
316
+ recommended_columns = 1 # Very long lines need a single column
317
+ elif longest_line_words >= 18:
318
+ recommended_columns = 2 # Long lines need 2 columns
319
+ elif longest_line_words >= 11:
320
+ recommended_columns = 3 # Medium lines can use 3 columns
321
+ else:
322
+ recommended_columns = "Auto" # Default to auto for shorter lines
323
+
324
+ st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
325
+ else:
326
+ recommended_columns = "Auto"
327
+
328
+ column_options = ["Auto"] + list(range(1, 7))
329
+ num_columns = st.selectbox("Number of Columns", options=column_options,
330
+ index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
331
+ num_columns = 0 if num_columns == "Auto" else int(num_columns)
332
+ st.info("Font size and columns adjust to fit one page.")
333
+
334
+ # Changed label from "Modify the markdown content below:" to "Input Markdown"
335
+ edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
336
+
337
+ # Added emoji to "Update PDF" button and created a two-column layout for buttons
338
+ col1, col2 = st.columns(2)
339
+ with col1:
340
+ if st.button("πŸ”„πŸ“„ Update PDF"):
341
+ st.session_state.markdown_content = edited_markdown
342
+ if selected_md:
343
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
344
+ f.write(edited_markdown)
345
+ st.rerun()
346
+
347
+ # Added "Trim Emojis" button in second column
348
+ with col2:
349
+ if st.button("βœ‚οΈ Trim Emojis"):
350
+ trimmed_content = trim_emojis_except_numbered(edited_markdown)
351
+ st.session_state.markdown_content = trimmed_content
352
+ if selected_md:
353
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
354
+ f.write(trimmed_content)
355
+ st.rerun()
356
+
357
+ prefix = get_timestamp_prefix()
358
+ st.download_button(
359
+ label="πŸ’ΎπŸ“ Save Markdown",
360
+ data=st.session_state.markdown_content,
361
+ file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
362
+ mime="text/markdown"
363
+ )
364
+ st.markdown("### Text-to-Speech")
365
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
366
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
367
+ if st.button("Generate Audio"):
368
+ cleaned_text = clean_for_speech(st.session_state.markdown_content)
369
+ audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
370
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
371
+ st.audio(audio_file)
372
+ with open(audio_file, "rb") as f:
373
+ audio_bytes = f.read()
374
+ st.download_button(
375
+ label="πŸ’ΎπŸ”Š Save Audio",
376
+ data=audio_bytes,
377
+ file_name=audio_filename,
378
+ mime="audio/mpeg"
379
+ )
380
+
381
+ with st.spinner("Generating PDF..."):
382
+ pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)
383
+
384
+ with st.container():
385
+ pdf_images = pdf_to_image(pdf_bytes)
386
+ if pdf_images:
387
+ for img in pdf_images:
388
+ st.image(img, use_container_width=True)
389
+ else:
390
+ st.info("Download the PDF to view it locally.")
391
+
392
+ with st.sidebar:
393
+ st.download_button(
394
+ label="πŸ’ΎπŸ“„ Save PDF",
395
+ data=pdf_bytes,
396
+ file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
397
+ mime="application/pdf"
398
+ ))
399
 
400
  # Track if we've seen the first numbered line already
401
  first_numbered_seen = False
402
 
403
  for line in lines:
404
  line = line.strip()
405
+ if not line:
406
  continue
407
 
408
+ # Process headings if headings_to_fonts is enabled
409
+ if headings_to_fonts and line.startswith('#'):
410
+ heading_match = heading_pattern.match(line)
411
+ if heading_match:
412
+ level = len(heading_match.group(1)) # Number of # symbols
413
+ heading_text = heading_match.group(2).strip()
414
+ # Convert the heading to bold with appropriate formatting
415
+ formatted_heading = f"<h{level}>{heading_text}</h{level}>"
416
+ pdf_content.append(formatted_heading)
417
+ continue
418
+
419
  # Check if this is a numbered line
420
  is_numbered_line = number_pattern.match(line) is not None
421
 
 
429
  first_numbered_seen = True
430
 
431
  line = detect_and_convert_links(line)
432
+
433
+ # Process bold text (*word* or **word**)
434
+ if render_with_bold or headings_to_fonts:
435
+ # Handle both *word* and **word** patterns
436
+ line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
437
+ if headings_to_fonts:
438
+ line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
439
+
440
  if auto_bold_numbers and is_numbered_line:
441
  if not (line.startswith("<b>") and line.endswith("</b>")):
442
  if "<b>" in line and "</b>" in line: