awacke1 commited on
Commit
f2a2d43
Β·
verified Β·
1 Parent(s): 1f79691

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +629 -0
app.py ADDED
@@ -0,0 +1,629 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import re
3
+ import os
4
+ import glob
5
+ import asyncio
6
+ import hashlib
7
+ import unicodedata
8
+ import streamlit as st
9
+ from PIL import Image
10
+ import fitz
11
+ import edge_tts
12
+ from reportlab.lib.pagesizes import A4
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib import colors
16
+ from reportlab.pdfbase import pdfmetrics
17
+ from reportlab.pdfbase.ttfonts import TTFont
18
+ from reportlab.pdfgen import canvas
19
+ from datetime import datetime
20
+ import pytz
21
+ from pypdf import PdfReader, PdfWriter
22
+ from pypdf.annotations import Link
23
+ from pypdf.generic import Fit
24
+
25
+ st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
26
+
27
+ # Existing functions (unchanged)
28
+ def get_timestamp_prefix():
29
+ central = pytz.timezone("US/Central")
30
+ now = datetime.now(central)
31
+ return now.strftime("%a %m%d %I%M%p").upper()
32
+
33
+ def clean_for_speech(text):
34
+ text = text.replace("#", "")
35
+ emoji_pattern = re.compile(
36
+ r"[\U0001F300-\U0001F5FF"
37
+ r"\U0001F600-\U0001F64F"
38
+ r"\U0001F680-\U0001F6FF"
39
+ r"\U0001F700-\U0001F77F"
40
+ r"\U0001F780-\U0001F7FF"
41
+ r"\U0001F800-\U0001F8FF"
42
+ r"\U0001F900-\U0001F9FF"
43
+ r"\U0001FA00-\U0001FA6F"
44
+ r"\U0001FA70-\U0001FAFF"
45
+ r"\u2600-\u26FF"
46
+ r"\u2700-\u27BF]+", flags=re.UNICODE)
47
+ text = emoji_pattern.sub('', text)
48
+ return text
49
+
50
+ def trim_emojis_except_numbered(markdown_text):
51
+ emoji_pattern = re.compile(
52
+ r"[\U0001F300-\U0001F5FF"
53
+ r"\U0001F600-\U0001F64F"
54
+ r"\U0001F680-\U0001F6FF"
55
+ r"\U0001F700-\U0001F77F"
56
+ r"\U0001F780-\U0001F7FF"
57
+ r"\U0001F800-\U0001F8FF"
58
+ r"\U0001F900-\U0001F9FF"
59
+ r"\U0001FAD0-\U0001FAD9"
60
+ r"\U0001FA00-\U0001FA6F"
61
+ r"\U0001FA70-\U0001FAFF"
62
+ r"\u2600-\u26FF"
63
+ r"\u2700-\u27BF]+"
64
+ )
65
+ number_pattern = re.compile(r'^\d+\.\s')
66
+ lines = markdown_text.strip().split('\n')
67
+ processed_lines = []
68
+
69
+ for line in lines:
70
+ if number_pattern.match(line):
71
+ processed_lines.append(line)
72
+ else:
73
+ processed_lines.append(emoji_pattern.sub('', line))
74
+
75
+ return '\n'.join(processed_lines)
76
+
77
+ async def generate_audio(text, voice, filename):
78
+ communicate = edge_tts.Communicate(text, voice)
79
+ await communicate.save(filename)
80
+ return filename
81
+
82
+ def detect_and_convert_links(text):
83
+ # Convert Markdown links [text](url) to HTML <a> tags
84
+ md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
+ text = md_link_pattern.sub(r'<a href="\2" color="blue">\1</a>', text)
86
+
87
+ # Convert plain URLs to HTML <a> tags, avoiding already tagged links
88
+ url_pattern = re.compile(
89
+ r'(?<!href=")(https?://[^\s<>{}]+)',
90
+ re.IGNORECASE
91
+ )
92
+ text = url_pattern.sub(r'<a href="\1" color="blue">\1</a>', text)
93
+ return text
94
+
95
+ def apply_emoji_font(text, emoji_font):
96
+ # Protect existing tags
97
+ tag_pattern = re.compile(r'(<[^>]+>)')
98
+ segments = tag_pattern.split(text)
99
+ result = []
100
+
101
+ # Apply emoji font only to non-tag text
102
+ emoji_pattern = re.compile(
103
+ r"([\U0001F300-\U0001F5FF"
104
+ r"\U0001F600-\U0001F64F"
105
+ r"\U0001F680-\U0001F6FF"
106
+ r"\U0001F700-\U0001F77F"
107
+ r"\U0001F780-\U0001F7FF"
108
+ r"\U0001F800-\U0001F8FF"
109
+ r"\U0001F900-\U0001F9FF"
110
+ r"\U0001FAD0-\U0001FAD9"
111
+ r"\U0001FA00-\U0001FA6F"
112
+ r"\U0001FA70-\U0001FAFF"
113
+ r"\u2600-\u26FF"
114
+ r"\u2700-\u27BF]+)"
115
+ )
116
+
117
+ def replace_emoji(match):
118
+ emoji = match.group(1)
119
+ emoji = unicodedata.normalize('NFC', emoji)
120
+ return f'<font face="{emoji_font}">{emoji}</font>'
121
+
122
+ for segment in segments:
123
+ if tag_pattern.match(segment):
124
+ # Keep tags unchanged
125
+ result.append(segment)
126
+ else:
127
+ # Apply font to non-emoji text and emoji separately
128
+ parts = []
129
+ last_pos = 0
130
+ for match in emoji_pattern.finditer(segment):
131
+ start, end = match.span()
132
+ if last_pos < start:
133
+ parts.append(f'<font face="DejaVuSans">{segment[last_pos:start]}</font>')
134
+ parts.append(replace_emoji(match))
135
+ last_pos = end
136
+ if last_pos < len(segment):
137
+ parts.append(f'<font face="DejaVuSans">{segment[last_pos:]}</font>')
138
+ result.append(''.join(parts))
139
+
140
+ return ''.join(result)
141
+
142
+ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts):
143
+ lines = markdown_text.strip().split('\n')
144
+ pdf_content = []
145
+ number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
146
+ heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
147
+ first_numbered_seen = False
148
+
149
+ for line in lines:
150
+ line = line.strip()
151
+ if not line:
152
+ continue
153
+
154
+ if headings_to_fonts and line.startswith('#'):
155
+ heading_match = heading_pattern.match(line)
156
+ if heading_match:
157
+ level = len(heading_match.group(1))
158
+ heading_text = heading_match.group(2).strip()
159
+ formatted_heading = f"<h{level}>{heading_text}</h{level}>"
160
+ pdf_content.append(formatted_heading)
161
+ continue
162
+
163
+ is_numbered_line = number_pattern.match(line) is not None
164
+
165
+ if add_space_before_numbered and is_numbered_line:
166
+ if first_numbered_seen and not line.startswith("1."):
167
+ pdf_content.append("")
168
+ if not first_numbered_seen:
169
+ first_numbered_seen = True
170
+
171
+ line = detect_and_convert_links(line)
172
+
173
+ if render_with_bold or headings_to_fonts:
174
+ line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
175
+ if headings_to_fonts:
176
+ line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
177
+
178
+ if auto_bold_numbers and is_numbered_line:
179
+ if not (line.startswith("<b>") and line.endswith("</b>")):
180
+ if "<b>" in line and "</b>" in line:
181
+ line = re.sub(r'</?b>', '', line)
182
+ line = f"<b>{line}</b>"
183
+ pdf_content.append(line)
184
+ total_lines = len(pdf_content)
185
+ return pdf_content, total_lines
186
+
187
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts, doc_title):
188
+ buffer = io.BytesIO()
189
+ page_width = A4[0] * 2
190
+ page_height = A4[1]
191
+ doc = SimpleDocTemplate(
192
+ buffer,
193
+ pagesize=(page_width, page_height),
194
+ leftMargin=36,
195
+ rightMargin=36,
196
+ topMargin=36,
197
+ bottomMargin=36,
198
+ title=doc_title
199
+ )
200
+ styles = getSampleStyleSheet()
201
+ spacer_height = 10
202
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts)
203
+ try:
204
+ available_font_files = glob.glob("*.ttf")
205
+ if not available_font_files:
206
+ st.error("No .ttf font files found.")
207
+ return
208
+ selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
209
+ if selected_font_path:
210
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
211
+ pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
212
+ except Exception as e:
213
+ st.error(f"Font registration error: {e}")
214
+ return
215
+ total_chars = sum(len(line) for line in pdf_content)
216
+ hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
217
+ content_density = total_lines * hierarchy_weight + total_chars / 50
218
+ usable_height = page_height - 72 - spacer_height
219
+ usable_width = page_width - 72
220
+ avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
221
+ ideal_lines_per_col = 20
222
+ suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
223
+ num_columns = num_columns if num_columns != 0 else suggested_columns
224
+ col_width = usable_width / num_columns
225
+ min_font_size = 6
226
+ max_font_size = 16
227
+ lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
228
+ target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
229
+ estimated_font_size = int(target_height_per_line / 1.5)
230
+ adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
231
+ if avg_line_chars > col_width / adjusted_font_size * 10:
232
+ adjusted_font_size = int(col_width / (avg_line_chars / 10))
233
+ adjusted_font_size = max(min_font_size, adjusted_font_size)
234
+ item_style = ParagraphStyle(
235
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
236
+ fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
237
+ linkUnderline=True
238
+ )
239
+ numbered_bold_style = ParagraphStyle(
240
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
241
+ fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
242
+ leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
243
+ linkUnderline=True
244
+ )
245
+ section_style = ParagraphStyle(
246
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
247
+ textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
248
+ linkUnderline=True
249
+ )
250
+ columns = [[] for _ in range(num_columns)]
251
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
252
+ current_line_count = 0
253
+ current_column = 0
254
+ number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
255
+ for item in pdf_content:
256
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
257
+ current_column += 1
258
+ current_line_count = 0
259
+ columns[current_column].append(item)
260
+ current_line_count += 1
261
+ column_cells = [[] for _ in range(num_columns)]
262
+ for col_idx, column in enumerate(columns):
263
+ for item in column:
264
+ if isinstance(item, str):
265
+ heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
266
+ if heading_match:
267
+ level = int(heading_match.group(1))
268
+ heading_text = heading_match.group(2)
269
+ heading_style = ParagraphStyle(
270
+ f'Heading{level}Style',
271
+ parent=styles['Heading1'],
272
+ fontName="DejaVuSans",
273
+ textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
274
+ fontSize=adjusted_font_size * (1.6 - (level-1)*0.15),
275
+ leading=adjusted_font_size * (1.8 - (level-1)*0.15),
276
+ spaceAfter=4 - (level-1),
277
+ spaceBefore=6 - (level-1),
278
+ linkUnderline=True
279
+ )
280
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
281
+ elif item.startswith("<b>") and item.endswith("</b>"):
282
+ content = item[3:-4].strip()
283
+ # Apply numbered_bold_style to all numbered lines (top-level and sub-level)
284
+ if number_pattern.match(content):
285
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
286
+ else:
287
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
288
+ else:
289
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
290
+ else:
291
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "DejaVuSans"), item_style))
292
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
293
+ for cells in column_cells:
294
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
295
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
296
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
297
+ table.setStyle(TableStyle([
298
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
299
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
300
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
301
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
302
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
303
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
304
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
305
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
306
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
307
+ ]))
308
+ story = [Spacer(1, spacer_height), table]
309
+ doc.build(story)
310
+ buffer.seek(0)
311
+ return buffer.getvalue()
312
+
313
+ def pdf_to_image(pdf_bytes):
314
+ try:
315
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
316
+ images = []
317
+ for page in doc:
318
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
319
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
320
+ images.append(img)
321
+ doc.close()
322
+ return images
323
+ except Exception as e:
324
+ st.error(f"Failed to render PDF preview: {e}")
325
+ return None
326
+
327
+ # PDF creation and linking functions
328
+ WORDS_10 = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
329
+ WORDS_20 = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
330
+ "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty"]
331
+
332
+ def create_crossfile_pdfs(source_pdf="TestSource.pdf", target_pdf="TestTarget.pdf"):
333
+ """Create two PDFs with cross-file linking."""
334
+ def create_base_pdf(filename):
335
+ buffer = io.BytesIO()
336
+ c = canvas.Canvas(buffer)
337
+ c.setFont("Helvetica", 12)
338
+ for i, word in enumerate(WORDS_10, 1):
339
+ y = 800 - (i * 20)
340
+ c.drawString(50, y, f"{i}. {word}")
341
+ c.showPage()
342
+ c.save()
343
+ buffer.seek(0)
344
+ with open(filename, "wb") as f:
345
+ f.write(buffer.getvalue())
346
+ buffer.close()
347
+
348
+ def add_bookmark_to_seven(pdf_file):
349
+ reader = PdfReader(pdf_file)
350
+ writer = PdfWriter()
351
+ for page in reader.pages:
352
+ writer.add_page(page)
353
+ page = writer.pages[0]
354
+ y_position = 800 - (7 * 20)
355
+ fit = Fit(fit_type="/XYZ", fit_args=[50, y_position, 0])
356
+ writer.add_outline_item("Seven Bookmark", 0, fit=fit)
357
+ with open(pdf_file, "wb") as f:
358
+ writer.write(f)
359
+
360
+ def modify_source_pdf(source, target):
361
+ reader = PdfReader(source)
362
+ writer = PdfWriter()
363
+ for page in reader.pages:
364
+ writer.add_page(page)
365
+ buffer = io.BytesIO()
366
+ c = canvas.Canvas(buffer)
367
+ c.setFont("Helvetica", 8)
368
+ seven_y = 800 - (7 * 20)
369
+ c.drawString(90, seven_y - 5, "link")
370
+ c.showPage()
371
+ c.save()
372
+ buffer.seek(0)
373
+ text_pdf = PdfReader(buffer)
374
+ page = writer.pages[0]
375
+ page.merge_page(text_pdf.pages[0])
376
+ link = Link(
377
+ rect=(90, seven_y - 10, 150, seven_y + 10),
378
+ url=f"file://{os.path.abspath(target)}#page=1"
379
+ )
380
+ writer.add_annotation(page_number=0, annotation=link)
381
+ with open(source, "wb") as f:
382
+ writer.write(f)
383
+ buffer.close()
384
+
385
+ def add_internal_link(pdf_file):
386
+ reader = PdfReader(pdf_file)
387
+ writer = PdfWriter()
388
+ for page in reader.pages:
389
+ writer.add_page(page)
390
+ one_y = 800 - (1 * 20)
391
+ ten_y = 800 - (10 * 20)
392
+ link = Link(
393
+ rect=(50, one_y - 10, 100, one_y + 10),
394
+ target_page_index=0,
395
+ fit=Fit(fit_type="/XYZ", fit_args=[50, ten_y, 0])
396
+ )
397
+ writer.add_annotation(page_number=0, annotation=link)
398
+ with open(pdf_file, "wb") as f:
399
+ writer.write(f)
400
+
401
+ create_base_pdf(source_pdf)
402
+ create_base_pdf(target_pdf)
403
+ add_bookmark_to_seven(target_pdf)
404
+ modify_source_pdf(source_pdf, target_pdf)
405
+ add_internal_link(source_pdf)
406
+ add_internal_link(target_pdf)
407
+ return source_pdf, target_pdf
408
+
409
+ def create_selflinking_pdf(pdf_file="SelfLinking.pdf"):
410
+ """Create a PDF with a TOC on page 1 linking to a 1-20 list starting on page 2."""
411
+ buffer = io.BytesIO()
412
+ c = canvas.Canvas(buffer)
413
+
414
+ # Page 1: Table of Contents
415
+ c.setFont("Helvetica", 14)
416
+ c.drawString(50, 800, "Table of Contents")
417
+ c.setFont("Helvetica", 12)
418
+ toc_y_positions = []
419
+ for i, word in enumerate(WORDS_10, 1):
420
+ y = 760 - (i * 20)
421
+ c.drawString(50, y, f"{word}")
422
+ toc_y_positions.append(y)
423
+ c.showPage()
424
+
425
+ # Page 2: Numbered list 1-20
426
+ c.setFont("Helvetica", 12)
427
+ list_y_positions = []
428
+ for i, word in enumerate(WORDS_20, 1):
429
+ y = 800 - (i * 20)
430
+ c.drawString(50, y, f"{i}. {word}")
431
+ list_y_positions.append(y)
432
+ c.showPage()
433
+
434
+ # Save the initial PDF
435
+ c.save()
436
+ buffer.seek(0)
437
+ with open(pdf_file, "wb") as f:
438
+ f.write(buffer.getvalue())
439
+ buffer.close()
440
+
441
+ # Add outlines and links
442
+ reader = PdfReader(pdf_file)
443
+ writer = PdfWriter()
444
+ for page in reader.pages:
445
+ writer.add_page(page)
446
+
447
+ # Add outline entries
448
+ toc_page = writer.pages[0]
449
+ list_page = writer.pages[1]
450
+ writer.add_outline_item("Table of Contents", 0, fit=Fit(fit_type="/Fit"))
451
+ for i, word in enumerate(WORDS_10, 1):
452
+ y = list_y_positions[i-1]
453
+ writer.add_outline_item(word, 1, fit=Fit(fit_type="/XYZ", fit_args=[50, y, 0]))
454
+
455
+ # Add TOC links from page 1 to page 2
456
+ for i, word in enumerate(WORDS_10):
457
+ toc_y = toc_y_positions[i]
458
+ list_y = list_y_positions[i]
459
+ link = Link(
460
+ rect=(50, toc_y - 10, 150, toc_y + 10),
461
+ target_page_index=1,
462
+ fit=Fit(fit_type="/XYZ", fit_args=[50, list_y, 0])
463
+ )
464
+ writer.add_annotation(page_number=0, annotation=link)
465
+
466
+ # Save the modified PDF
467
+ with open(pdf_file, "wb") as f:
468
+ writer.write(f)
469
+
470
+ return pdf_file
471
+
472
+ # Streamlit UI
473
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
474
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
475
+
476
+ with st.sidebar:
477
+ st.markdown("### PDF Options")
478
+ if md_options:
479
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
480
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
481
+ st.session_state.markdown_content = f.read()
482
+ else:
483
+ st.warning("No markdown file found. Please add one to your folder.")
484
+ selected_md = None
485
+ st.session_state.markdown_content = ""
486
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
487
+ selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
488
+ index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
489
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
490
+ render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
491
+ auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
492
+ enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
493
+ add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
494
+
495
+ headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
496
+ help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
497
+
498
+ auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
499
+
500
+ if auto_columns and 'markdown_content' in st.session_state:
501
+ current_markdown = st.session_state.markdown_content
502
+ lines = current_markdown.strip().split('\n')
503
+ longest_line_words = 0
504
+ for line in lines:
505
+ if line.strip():
506
+ word_count = len(line.split())
507
+ longest_line_words = max(longest_line_words, word_count)
508
+ if longest_line_words > 25:
509
+ recommended_columns = 1
510
+ elif longest_line_words >= 18:
511
+ recommended_columns = 2
512
+ elif longest_line_words >= 11:
513
+ recommended_columns = 3
514
+ else:
515
+ recommended_columns = "Auto"
516
+ st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
517
+ else:
518
+ recommended_columns = "Auto"
519
+
520
+ column_options = ["Auto"] + list(range(1, 7))
521
+ num_columns = st.selectbox("Number of Columns", options=column_options,
522
+ index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
523
+ num_columns = 0 if num_columns == "Auto" else int(num_columns)
524
+ st.info("Font size and columns adjust to fit one page.")
525
+
526
+ edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
527
+
528
+ col1, col2 = st.columns(2)
529
+ with col1:
530
+ if st.button("πŸ”„πŸ“„ Update PDF"):
531
+ st.session_state.markdown_content = edited_markdown
532
+ if selected_md:
533
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
534
+ f.write(edited_markdown)
535
+ st.rerun()
536
+
537
+ with col2:
538
+ if st.button("βœ‚οΈ Trim Emojis"):
539
+ trimmed_content = trim_emojis_except_numbered(edited_markdown)
540
+ st.session_state.markdown_content = trimmed_content
541
+ if selected_md:
542
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
543
+ f.write(trimmed_content)
544
+ st.rerun()
545
+
546
+ prefix = get_timestamp_prefix()
547
+ st.download_button(
548
+ label="πŸ’ΎπŸ“ Save Markdown",
549
+ data=st.session_state.markdown_content,
550
+ file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
551
+ mime="text/markdown"
552
+ )
553
+ st.markdown("### Text-to-Speech")
554
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
555
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
556
+ if st.button("Generate Audio"):
557
+ cleaned_text = clean_for_speech(st.session_state.markdown_content)
558
+ audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
559
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
560
+ st.audio(audio_file)
561
+ with open(audio_file, "rb") as f:
562
+ audio_bytes = f.read()
563
+ st.download_button(
564
+ label="πŸ’ΎπŸ”Š Save Audio",
565
+ data=audio_bytes,
566
+ file_name=audio_filename,
567
+ mime="audio/mpeg"
568
+ )
569
+
570
+ if st.button("πŸ“‘ Create CrossFile PDFs"):
571
+ with st.spinner("Creating cross-file linked PDFs..."):
572
+ source_pdf, target_pdf = create_crossfile_pdfs()
573
+ st.success(f"Created {source_pdf} and {target_pdf}")
574
+ for pdf_file in [source_pdf, target_pdf]:
575
+ with open(pdf_file, "rb") as f:
576
+ st.download_button(
577
+ label=f"πŸ’Ύ Download {pdf_file}",
578
+ data=f.read(),
579
+ file_name=pdf_file,
580
+ mime="application/pdf"
581
+ )
582
+
583
+ if st.button("πŸ§ͺ Create SelfLinking PDF"):
584
+ with st.spinner("Generating self-linking PDF with TOC..."):
585
+ pdf_file = create_selflinking_pdf()
586
+ st.success(f"Generated {pdf_file}")
587
+ with open(pdf_file, "rb") as f:
588
+ pdf_bytes = f.read()
589
+ images = pdf_to_image(pdf_bytes)
590
+ if images:
591
+ st.subheader(f"Preview of {pdf_file}")
592
+ for i, img in enumerate(images):
593
+ st.image(img, caption=f"{pdf_file} Page {i+1}", use_container_width=True)
594
+ with open(pdf_file, "rb") as f:
595
+ st.download_button(
596
+ label=f"πŸ’Ύ Download {pdf_file}",
597
+ data=f.read(),
598
+ file_name=pdf_file,
599
+ mime="application/pdf"
600
+ )
601
+
602
+ with st.spinner("Generating PDF..."):
603
+ pdf_bytes = create_pdf(
604
+ st.session_state.markdown_content,
605
+ base_font_size,
606
+ render_with_bold,
607
+ auto_bold_numbers,
608
+ enlarge_numbered,
609
+ num_columns,
610
+ add_space_before_numbered,
611
+ headings_to_fonts,
612
+ doc_title=selected_md if selected_md else "Untitled"
613
+ )
614
+
615
+ with st.container():
616
+ pdf_images = pdf_to_image(pdf_bytes)
617
+ if pdf_images:
618
+ for img in pdf_images:
619
+ st.image(img, use_container_width=True)
620
+ else:
621
+ st.info("Download the PDF to view it locally.")
622
+
623
+ with st.sidebar:
624
+ st.download_button(
625
+ label="πŸ’ΎπŸ“„ Save PDF",
626
+ data=pdf_bytes,
627
+ file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
628
+ mime="application/pdf"
629
+ )