awacke1 commited on
Commit
86cbf3b
Β·
verified Β·
1 Parent(s): 8f4148e

Create backup12.app.py

Browse files
Files changed (1) hide show
  1. backup12.app.py +663 -0
backup12.app.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import re
3
+ import os
4
+ import glob
5
+ import asyncio
6
+ import hashlib
7
+ import unicodedata
8
+ import streamlit as st
9
+ from PIL import Image
10
+ import fitz
11
+ import edge_tts
12
+ from reportlab.lib.pagesizes import A4
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib import colors
16
+ from reportlab.pdfbase import pdfmetrics
17
+ from reportlab.pdfbase.ttfonts import TTFont
18
+ from reportlab.pdfgen import canvas
19
+ from datetime import datetime
20
+ import pytz
21
+ from pypdf import PdfReader, PdfWriter
22
+ from pypdf.annotations import Link
23
+ from pypdf.generic import Fit
24
+
25
+ st.set_page_config(layout="wide", initial_sidebar_state="expanded")
26
+
27
+ # Existing functions (unchanged)
28
+ def get_timestamp_prefix():
29
+ central = pytz.timezone("US/Central")
30
+ now = datetime.now(central)
31
+ return now.strftime("%a %m%d %I%M%p").upper()
32
+
33
+ def clean_for_speech(text):
34
+ text = text.replace("#", "")
35
+ emoji_pattern = re.compile(
36
+ r"[\U0001F300-\U0001F5FF"
37
+ r"\U0001F600-\U0001F64F"
38
+ r"\U0001F680-\U0001F6FF"
39
+ r"\U0001F700-\U0001F77F"
40
+ r"\U0001F780-\U0001F7FF"
41
+ r"\U0001F800-\U0001F8FF"
42
+ r"\U0001F900-\U0001F9FF"
43
+ r"\U0001FA00-\U0001FA6F"
44
+ r"\U0001FA70-\U0001FAFF"
45
+ r"\u2600-\u26FF"
46
+ r"\u2700-\u27BF]+", flags=re.UNICODE)
47
+ text = emoji_pattern.sub('', text)
48
+ return text
49
+
50
+ def trim_emojis_except_numbered(markdown_text):
51
+ emoji_pattern = re.compile(
52
+ r"[\U0001F300-\U0001F5FF"
53
+ r"\U0001F600-\U0001F64F"
54
+ r"\U0001F680-\U0001F6FF"
55
+ r"\U0001F700-\U0001F77F"
56
+ r"\U0001F780-\U0001F7FF"
57
+ r"\U0001F800-\U0001F8FF"
58
+ r"\U0001F900-\U0001F9FF"
59
+ r"\U0001FAD0-\U0001FAD9"
60
+ r"\U0001FA00-\U0001FA6F"
61
+ r"\U0001FA70-\U0001FAFF"
62
+ r"\u2600-\u26FF"
63
+ r"\u2700-\u27BF]+"
64
+ )
65
+ number_pattern = re.compile(r'^\d+\.\s')
66
+ lines = markdown_text.strip().split('\n')
67
+ processed_lines = []
68
+
69
+ for line in lines:
70
+ if number_pattern.match(line):
71
+ processed_lines.append(line)
72
+ else:
73
+ processed_lines.append(emoji_pattern.sub('', line))
74
+
75
+ return '\n'.join(processed_lines)
76
+
77
+ async def generate_audio(text, voice, filename):
78
+ communicate = edge_tts.Communicate(text, voice)
79
+ await communicate.save(filename)
80
+ return filename
81
+
82
+ def detect_and_convert_links(text):
83
+ # Convert Markdown links [text](url) to HTML <a> tags
84
+ md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
+ text = md_link_pattern.sub(r'<a href="\2" color="blue">\1</a>', text)
86
+
87
+ # Convert plain URLs to HTML <a> tags, avoiding already tagged links
88
+ url_pattern = re.compile(
89
+ r'(?<!href=")(https?://[^\s<>{}]+)',
90
+ re.IGNORECASE
91
+ )
92
+ text = url_pattern.sub(r'<a href="\1" color="blue">\1</a>', text)
93
+ return text
94
+
95
+ def apply_emoji_font(text, emoji_font):
96
+ # Protect existing tags
97
+ tag_pattern = re.compile(r'(<[^>]+>)')
98
+ segments = tag_pattern.split(text)
99
+ result = []
100
+
101
+ # Apply emoji font only to emojis, use DejaVuSans for other text
102
+ emoji_pattern = re.compile(
103
+ r"([\U0001F300-\U0001F5FF"
104
+ r"\U0001F600-\U0001F64F"
105
+ r"\U0001F680-\U0001F6FF"
106
+ r"\U0001F700-\U0001F77F"
107
+ r"\U0001F780-\U0001F7FF"
108
+ r"\U0001F800-\U0001F8FF"
109
+ r"\U0001F900-\U0001F9FF"
110
+ r"\U0001FAD0-\U0001FAD9"
111
+ r"\U0001FA00-\U0001FA6F"
112
+ r"\U0001FA70-\U0001FAFF"
113
+ r"\u2600-\u26FF"
114
+ r"\u2700-\u27BF]+)"
115
+ )
116
+
117
+ def replace_emoji(match):
118
+ emoji = match.group(1)
119
+ emoji = unicodedata.normalize('NFC', emoji)
120
+ return f'<font face="{emoji_font}">{emoji}</font>'
121
+
122
+ for segment in segments:
123
+ if tag_pattern.match(segment):
124
+ # Keep tags unchanged
125
+ result.append(segment)
126
+ else:
127
+ # Apply DejaVuSans to non-emoji text, emoji_font to emojis
128
+ parts = []
129
+ last_pos = 0
130
+ for match in emoji_pattern.finditer(segment):
131
+ start, end = match.span()
132
+ if last_pos < start:
133
+ parts.append(f'<font face="DejaVuSans">{segment[last_pos:start]}</font>')
134
+ parts.append(replace_emoji(match))
135
+ last_pos = end
136
+ if last_pos < len(segment):
137
+ parts.append(f'<font face="DejaVuSans">{segment[last_pos:]}</font>')
138
+ result.append(''.join(parts))
139
+
140
+ return ''.join(result)
141
+
142
+ def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts):
143
+ lines = markdown_text.strip().split('\n')
144
+ pdf_content = []
145
+ number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
146
+ heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
147
+ first_numbered_seen = False
148
+
149
+ for line in lines:
150
+ line = line.strip()
151
+ if not line:
152
+ continue
153
+
154
+ if headings_to_fonts and line.startswith('#'):
155
+ heading_match = heading_pattern.match(line)
156
+ if heading_match:
157
+ level = len(heading_match.group(1))
158
+ heading_text = heading_match.group(2).strip()
159
+ formatted_heading = f"<h{level}>{heading_text}</h{level}>"
160
+ pdf_content.append(formatted_heading)
161
+ continue
162
+
163
+ is_numbered_line = number_pattern.match(line) is not None
164
+
165
+ if add_space_before_numbered and is_numbered_line:
166
+ if first_numbered_seen and not line.startswith("1."):
167
+ pdf_content.append("")
168
+ if not first_numbered_seen:
169
+ first_numbered_seen = True
170
+
171
+ line = detect_and_convert_links(line)
172
+
173
+ # Preserve bold and emphasis formatting
174
+ line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
175
+ line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
176
+
177
+ pdf_content.append(line)
178
+ total_lines = len(pdf_content)
179
+ return pdf_content, total_lines
180
+
181
+ def create_pdf(markdown_text, base_font_size, num_columns, add_space_before_numbered, headings_to_fonts, doc_title, longest_line_words, total_lines):
182
+ if not markdown_text.strip():
183
+ return None # Handle empty markdown gracefully
184
+ buffer = io.BytesIO()
185
+ page_width = A4[0] * 2
186
+ page_height = A4[1]
187
+ doc = SimpleDocTemplate(
188
+ buffer,
189
+ pagesize=(page_width, page_height),
190
+ leftMargin=36,
191
+ rightMargin=36,
192
+ topMargin=36,
193
+ bottomMargin=36,
194
+ title=doc_title
195
+ )
196
+ styles = getSampleStyleSheet()
197
+ spacer_height = 10
198
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts)
199
+ try:
200
+ available_font_files = glob.glob("*.ttf")
201
+ if not available_font_files:
202
+ st.error("No .ttf font files found.")
203
+ return None
204
+ selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
205
+ if selected_font_path:
206
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
207
+ pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
208
+ except Exception as e:
209
+ st.error(f"Font registration error: {e}")
210
+ return None
211
+ total_chars = sum(len(line) for line in pdf_content)
212
+ hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
213
+ content_density = total_lines * hierarchy_weight + total_chars / 50
214
+ usable_height = page_height - 72 - spacer_height
215
+ usable_width = page_width - 72
216
+ avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
217
+ ideal_lines_per_col = 20
218
+ suggested_columns = max(2, min(4, int(total_lines / ideal_lines_per_col) + 1))
219
+ num_columns = num_columns if num_columns != 0 else suggested_columns
220
+ col_width = usable_width / num_columns
221
+ min_font_size = 5 # Reduced to allow tighter fit
222
+ max_font_size = 16
223
+ lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
224
+ target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
225
+ estimated_font_size = int(target_height_per_line / 1.5)
226
+ adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
227
+ if avg_line_chars > col_width / adjusted_font_size * 10:
228
+ adjusted_font_size = int(col_width / (avg_line_chars / 10))
229
+ adjusted_font_size = max(min_font_size, adjusted_font_size)
230
+
231
+ # Enhanced font size scaling for one-page fit
232
+ if longest_line_words > 17 or lines_per_col > 20:
233
+ font_scale = min(17 / max(longest_line_words, 17), 60 / max(lines_per_col, 20))
234
+ adjusted_font_size = max(min_font_size, int(base_font_size * font_scale))
235
+
236
+ item_style = ParagraphStyle(
237
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
238
+ fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
239
+ linkUnderline=True
240
+ )
241
+ numbered_bold_style = ParagraphStyle(
242
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
243
+ fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
244
+ linkUnderline=True
245
+ )
246
+ section_style = ParagraphStyle(
247
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
248
+ textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
249
+ linkUnderline=True
250
+ )
251
+ columns = [[] for _ in range(num_columns)]
252
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
253
+ current_line_count = 0
254
+ current_column = 0
255
+ number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
256
+ for item in pdf_content:
257
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
258
+ current_column += 1
259
+ current_line_count = 0
260
+ columns[current_column].append(item)
261
+ current_line_count += 1
262
+ column_cells = [[] for _ in range(num_columns)]
263
+ for col_idx, column in enumerate(columns):
264
+ for item in column:
265
+ if isinstance(item, str):
266
+ heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
267
+ if heading_match:
268
+ level = int(heading_match.group(1))
269
+ heading_text = heading_match.group(2)
270
+ heading_style = ParagraphStyle(
271
+ f'Heading{level}Style',
272
+ parent=styles['Heading1'],
273
+ fontName="DejaVuSans",
274
+ textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
275
+ fontSize=adjusted_font_size * (1.6 - (level-1)*0.15),
276
+ leading=adjusted_font_size * (1.8 - (level-1)*0.15),
277
+ spaceAfter=4 - (level-1),
278
+ spaceBefore=6 - (level-1),
279
+ linkUnderline=True
280
+ )
281
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
282
+ elif item.startswith("<b>") and item.endswith("</b>"):
283
+ content = item[3:-4].strip()
284
+ if number_pattern.match(content):
285
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
286
+ else:
287
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
288
+ else:
289
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "NotoEmoji-Bold"), item_style))
290
+ else:
291
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "NotoEmoji-Bold"), item_style))
292
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
293
+ for cells in column_cells:
294
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
295
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
296
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
297
+ table.setStyle(TableStyle([
298
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
299
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
300
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
301
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
302
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
303
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
304
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
305
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
306
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
307
+ ]))
308
+ story = [Spacer(1, spacer_height), table]
309
+ doc.build(story)
310
+ buffer.seek(0)
311
+ return buffer.getvalue()
312
+
313
+ def pdf_to_image(pdf_bytes):
314
+ if pdf_bytes is None:
315
+ return None
316
+ try:
317
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
318
+ images = []
319
+ for page in doc:
320
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
321
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
322
+ images.append(img)
323
+ doc.close()
324
+ return images
325
+ except Exception as e:
326
+ st.error(f"Failed to render PDF preview: {e}")
327
+ return None
328
+
329
+ # PDF creation and linking functions
330
+ WORDS_12 = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve"]
331
+ WORDS_24 = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
332
+ "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty",
333
+ "twenty-one", "twenty-two", "twenty-three", "twenty-four"]
334
+
335
+ def create_crossfile_pdfs(source_pdf="TestSource.pdf", target_pdf="TestTarget.pdf"):
336
+ """Create two PDFs with cross-file linking."""
337
+ def create_base_pdf(filename):
338
+ buffer = io.BytesIO()
339
+ c = canvas.Canvas(buffer)
340
+ c.setFont("Helvetica", 12)
341
+ for i, word in enumerate(WORDS_12, 1):
342
+ y = 800 - (i * 20)
343
+ c.drawString(50, y, f"{i}. {word}")
344
+ c.showPage()
345
+ c.save()
346
+ buffer.seek(0)
347
+ with open(filename, "wb") as f:
348
+ f.write(buffer.getvalue())
349
+ buffer.close()
350
+
351
+ def add_bookmark_to_seven(pdf_file):
352
+ reader = PdfReader(pdf_file)
353
+ writer = PdfWriter()
354
+ for page in reader.pages:
355
+ writer.add_page(page)
356
+ page = writer.pages[0]
357
+ y_position = 800 - (7 * 20)
358
+ fit = Fit(fit_type="/XYZ", fit_args=[50, y_position, 0])
359
+ writer.add_outline_item("Seven Bookmark", 0, fit=fit)
360
+ with open(pdf_file, "wb") as f:
361
+ writer.write(f)
362
+
363
+ def modify_source_pdf(source, target):
364
+ reader = PdfReader(source)
365
+ writer = PdfWriter()
366
+ for page in reader.pages:
367
+ writer.add_page(page)
368
+ buffer = io.BytesIO()
369
+ c = canvas.Canvas(buffer)
370
+ c.setFont("Helvetica", 8)
371
+ seven_y = 800 - (7 * 20)
372
+ c.drawString(90, seven_y - 5, "link")
373
+ c.showPage()
374
+ c.save()
375
+ buffer.seek(0)
376
+ text_pdf = PdfReader(buffer)
377
+ page = writer.pages[0]
378
+ page.merge_page(text_pdf.pages[0])
379
+ link = Link(
380
+ rect=(90, seven_y - 10, 150, seven_y + 10),
381
+ url=f"file://{os.path.abspath(target)}#page=1"
382
+ )
383
+ writer.add_annotation(page_number=0, annotation=link)
384
+ with open(source, "wb") as f:
385
+ writer.write(f)
386
+ buffer.close()
387
+
388
+ def add_internal_link(pdf_file):
389
+ reader = PdfReader(pdf_file)
390
+ writer = PdfWriter()
391
+ for page in reader.pages:
392
+ writer.add_page(page)
393
+ one_y = 800 - (1 * 20)
394
+ ten_y = 800 - (10 * 20)
395
+ link = Link(
396
+ rect=(50, one_y - 10, 100, one_y + 10),
397
+ target_page_index=0,
398
+ fit=Fit(fit_type="/XYZ", fit_args=[50, ten_y, 0])
399
+ )
400
+ writer.add_annotation(page_number=0, annotation=link)
401
+ with open(pdf_file, "wb") as f:
402
+ writer.write(f)
403
+
404
+ create_base_pdf(source_pdf)
405
+ create_base_pdf(target_pdf)
406
+ add_bookmark_to_seven(target_pdf)
407
+ modify_source_pdf(source, target)
408
+ add_internal_link(source_pdf)
409
+ add_internal_link(target_pdf)
410
+ return source_pdf, target_pdf
411
+
412
+ def create_selflinking_pdf(pdf_file="SelfLinking.pdf"):
413
+ """Create a PDF with a TOC on page 1 linking to a 1-24 list starting on page 2."""
414
+ buffer = io.BytesIO()
415
+ c = canvas.Canvas(buffer)
416
+
417
+ # Page 1: Table of Contents
418
+ c.setFont("Helvetica", 14)
419
+ c.drawString(50, 800, "Table of Contents")
420
+ c.setFont("Helvetica", 12)
421
+ toc_y_positions = []
422
+ for i, word in enumerate(WORDS_12, 1):
423
+ y = 760 - (i * 20)
424
+ c.drawString(50, y, f"{word}")
425
+ toc_y_positions.append(y)
426
+ c.showPage()
427
+
428
+ # Page 2: Numbered list 1-24
429
+ c.setFont("Helvetica", 12)
430
+ list_y_positions = []
431
+ for i, word in enumerate(WORDS_24, 1):
432
+ y = 800 - (i * 20)
433
+ c.drawString(50, y, f"{i}. {word}")
434
+ list_y_positions.append(y)
435
+ c.showPage()
436
+
437
+ # Save the initial PDF
438
+ c.save()
439
+ buffer.seek(0)
440
+ with open(pdf_file, "wb") as f:
441
+ f.write(buffer.getvalue())
442
+ buffer.close()
443
+
444
+ # Add outlines and links
445
+ reader = PdfReader(pdf_file)
446
+ writer = PdfWriter()
447
+ for page in reader.pages:
448
+ writer.add_page(page)
449
+
450
+ # Add outline entries
451
+ toc_page = writer.pages[0]
452
+ list_page = writer.pages[1]
453
+ writer.add_outline_item("Table of Contents", 0, fit=Fit(fit_type="/Fit"))
454
+ for i, word in enumerate(WORDS_12, 1):
455
+ y = list_y_positions[i-1]
456
+ writer.add_outline_item(word, 1, fit=Fit(fit_type="/XYZ", fit_args=[50, y, 0]))
457
+
458
+ # Add TOC links from page 1 to page 2
459
+ for i, word in enumerate(WORDS_12):
460
+ toc_y = toc_y_positions[i]
461
+ list_y = list_y_positions[i]
462
+ link = Link(
463
+ rect=(50, toc_y - 10, 150, toc_y + 10),
464
+ target_page_index=1,
465
+ fit=Fit(fit_type="/XYZ", fit_args=[50, list_y, 0])
466
+ )
467
+ writer.add_annotation(page_number=0, annotation=link)
468
+
469
+ # Save the modified PDF
470
+ with open(pdf_file, "wb") as f:
471
+ writer.write(f)
472
+
473
+ return pdf_file
474
+
475
+ # Streamlit UI
476
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
477
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
478
+
479
+ with st.sidebar:
480
+ st.markdown("### πŸ“„ PDF Options")
481
+ if md_options:
482
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
483
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
484
+ st.session_state.markdown_content = f.read()
485
+ else:
486
+ st.warning("No markdown file found. Please add one to your folder.")
487
+ selected_md = None
488
+ st.session_state.markdown_content = ""
489
+
490
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
491
+ selected_font_name = st.selectbox(
492
+ "Select Emoji Font",
493
+ options=list(available_font_files.keys()),
494
+ index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0
495
+ )
496
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
497
+
498
+ add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=True)
499
+ headings_to_fonts = st.checkbox(
500
+ "Headings to Fonts",
501
+ value=True,
502
+ help="Convert Markdown headings (# Heading) to styled fonts"
503
+ )
504
+ auto_columns = st.checkbox("AutoColumns", value=True)
505
+
506
+ # Calculate document stats
507
+ longest_line_words = 0
508
+ total_lines = 0
509
+ adjusted_font_size_display = base_font_size
510
+ if 'markdown_content' in st.session_state and st.session_state.markdown_content.strip():
511
+ current_markdown = st.session_state.markdown_content
512
+ lines = current_markdown.strip().split('\n')
513
+ total_lines = len([line for line in lines if line.strip()])
514
+ for line in lines:
515
+ if line.strip():
516
+ word_count = len(line.split())
517
+ longest_line_words = max(longest_line_words, word_count)
518
+ if auto_columns:
519
+ if longest_line_words > 38:
520
+ recommended_columns = 2
521
+ elif longest_line_words < 18 and total_lines < 20:
522
+ recommended_columns = 4
523
+ else:
524
+ recommended_columns = 3
525
+ else:
526
+ recommended_columns = 3
527
+ # Adjust font size for one-page fit
528
+ if longest_line_words > 17 or total_lines / max(num_columns, 1) > 20:
529
+ font_scale = min(17 / max(longest_line_words, 17), 60 / max(total_lines / max(num_columns, 1), 20))
530
+ adjusted_font_size_display = max(5, int(base_font_size * font_scale))
531
+ st.markdown("**Document Stats**")
532
+ st.write(f"- Longest Line: {longest_line_words} words")
533
+ st.write(f"- Total Lines: {total_lines}")
534
+ st.write(f"- Recommended Columns: {recommended_columns}")
535
+ st.write(f"- Adjusted Font Size: {adjusted_font_size_display} points")
536
+ else:
537
+ st.markdown("**Document Stats**")
538
+ st.write("- Longest Line: 0 words")
539
+ st.write("- Total Lines: 0")
540
+ st.write("- Recommended Columns: 3")
541
+ st.write(f"- Adjusted Font Size: {base_font_size} points")
542
+
543
+ column_options = [2, 3, 4]
544
+ num_columns = st.selectbox(
545
+ "Number of Columns",
546
+ options=column_options,
547
+ index=column_options.index(recommended_columns) if recommended_columns in column_options else 0
548
+ )
549
+ st.info("Font size and columns adjust to fit one page.")
550
+
551
+ st.markdown("### ✍️ Edit Markdown")
552
+ edited_markdown = st.text_area(
553
+ "Input Markdown",
554
+ value=st.session_state.markdown_content,
555
+ height=200,
556
+ key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}"
557
+ )
558
+
559
+ st.markdown("### πŸ’Ύ Actions")
560
+ col1, col2 = st.columns(2)
561
+ with col1:
562
+ if st.button("πŸ”„ Update PDF"):
563
+ st.session_state.markdown_content = edited_markdown
564
+ if selected_md:
565
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
566
+ f.write(edited_markdown)
567
+ st.rerun()
568
+
569
+ with col2:
570
+ if st.button("βœ‚οΈ Trim Emojis"):
571
+ trimmed_content = trim_emojis_except_numbered(edited_markdown)
572
+ st.session_state.markdown_content = trimmed_content
573
+ if selected_md:
574
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
575
+ f.write(trimmed_content)
576
+ st.rerun()
577
+
578
+ prefix = get_timestamp_prefix()
579
+ st.download_button(
580
+ label="πŸ’Ύ Save Markdown",
581
+ data=st.session_state.markdown_content,
582
+ file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
583
+ mime="text/markdown"
584
+ )
585
+
586
+ st.markdown("### πŸ”Š Text-to-Speech")
587
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
588
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
589
+ if st.button("Generate Audio"):
590
+ cleaned_text = clean_for_speech(st.session_state.markdown_content)
591
+ audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
592
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
593
+ st.audio(audio_file)
594
+ with open(audio_file, "rb") as f:
595
+ audio_bytes = f.read()
596
+ st.download_button(
597
+ label="πŸ’Ύ Save Audio",
598
+ data=audio_bytes,
599
+ file_name=audio_filename,
600
+ mime="audio/mpeg"
601
+ )
602
+
603
+ if st.button("πŸ“‘ Create CrossFile PDFs"):
604
+ with st.spinner("Creating cross-file linked PDFs..."):
605
+ source_pdf, target_pdf = create_crossfile_pdfs()
606
+ st.success(f"Created {source_pdf} and {target_pdf}")
607
+ for pdf_file in [source_pdf, target_pdf]:
608
+ with open(pdf_file, "rb") as f:
609
+ st.download_button(
610
+ label=f"πŸ’Ύ Download {pdf_file}",
611
+ data=f.read(),
612
+ file_name=pdf_file,
613
+ mime="application/pdf"
614
+ )
615
+
616
+ if st.button("πŸ§ͺ Create SelfLinking PDF"):
617
+ with st.spinner("Generating self-linking PDF with TOC..."):
618
+ pdf_file = create_selflinking_pdf()
619
+ st.success(f"Generated {pdf_file}")
620
+ with open(pdf_file, "rb") as f:
621
+ pdf_bytes = f.read()
622
+ images = pdf_to_image(pdf_bytes)
623
+ if images:
624
+ st.subheader(f"Preview of {pdf_file}")
625
+ for i, img in enumerate(images):
626
+ st.image(img, caption=f"{pdf_file} Page {i+1}", use_container_width=True)
627
+ with open(pdf_file, "rb") as f:
628
+ st.download_button(
629
+ label=f"πŸ’Ύ Download {pdf_file}",
630
+ data=f.read(),
631
+ file_name=pdf_file,
632
+ mime="application/pdf"
633
+ )
634
+
635
+ with st.spinner("Generating PDF..."):
636
+ pdf_bytes = create_pdf(
637
+ st.session_state.markdown_content,
638
+ base_font_size,
639
+ num_columns,
640
+ add_space_before_numbered,
641
+ headings_to_fonts,
642
+ doc_title=selected_md if selected_md else "Untitled",
643
+ longest_line_words=longest_line_words,
644
+ total_lines=total_lines
645
+ )
646
+
647
+ with st.container():
648
+ st.markdown("### πŸ“Š PDF Preview")
649
+ pdf_images = pdf_to_image(pdf_bytes)
650
+ if pdf_images:
651
+ for img in pdf_images:
652
+ st.image(img, use_container_width=True)
653
+ else:
654
+ st.info("Download the PDF to view it locally.")
655
+
656
+ with st.sidebar:
657
+ st.download_button(
658
+ label="πŸ’Ύ Save PDF",
659
+ data=pdf_bytes if pdf_bytes else "",
660
+ file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
661
+ mime="application/pdf",
662
+ disabled=pdf_bytes is None
663
+ )