Spaces:

awacke1
/

PDF-Paper-Maker-AI-UI-UX

Running

App Files Files Community

awacke1 commited on 24 days ago

Commit

b84aa61

verified ·

1 Parent(s): 703c2b5

Update app.py

Browse files

Files changed (1) hide show

app.py +255 -4

app.py CHANGED Viewed

@@ -158,19 +158,264 @@ def apply_emoji_font(text, emoji_font):
                     combined_text = before + f'<a href="{url}">{label}</a>' + after
     return combined_text
-def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered):
     lines = markdown_text.strip().split('\n')
     pdf_content = []
     number_pattern = re.compile(r'^\d+\.\s')
     # Track if we've seen the first numbered line already
     first_numbered_seen = False
     for line in lines:
         line = line.strip()
-        if not line or line.startswith('# '):
             continue
         # Check if this is a numbered line
         is_numbered_line = number_pattern.match(line) is not None
@@ -184,8 +429,14 @@ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers,
                 first_numbered_seen = True
         line = detect_and_convert_links(line)
-        if render_with_bold:
-            line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
         if auto_bold_numbers and is_numbered_line:
             if not (line.startswith("<b>") and line.endswith("</b>")):
                 if "<b>" in line and "</b>" in line:

                     combined_text = before + f'<a href="{url}">{label}</a>' + after
     return combined_text
+def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts):
     lines = markdown_text.strip().split('\n')
     pdf_content = []
     number_pattern = re.compile(r'^\d+\.\s')
+    heading_pattern = re.compile(r'^(#{1,4})\s+(.+)
+def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered, headings_to_fonts):
+    buffer = io.BytesIO()
+    page_width = A4[0] * 2
+    page_height = A4[1]
+    doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
+    styles = getSampleStyleSheet()
+    spacer_height = 10
+    pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts)
+    try:
+        available_font_files = glob.glob("*.ttf")
+        if not available_font_files:
+            st.error("No .ttf font files found.")
+            return
+        selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
+        if selected_font_path:
+            pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
+        pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
+    except Exception as e:
+        st.error(f"Font registration error: {e}")
+        return
+    total_chars = sum(len(line) for line in pdf_content)
+    hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
+    content_density = total_lines * hierarchy_weight + total_chars / 50
+    usable_height = page_height - 72 - spacer_height
+    usable_width = page_width - 72
+    avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
+    ideal_lines_per_col = 20
+    suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
+    num_columns = num_columns if num_columns != 0 else suggested_columns
+    col_width = usable_width / num_columns
+    min_font_size = 6
+    max_font_size = 16
+    lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
+    target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
+    estimated_font_size = int(target_height_per_line / 1.5)
+    adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
+    if avg_line_chars > col_width / adjusted_font_size * 10:
+        adjusted_font_size = int(col_width / (avg_line_chars / 10))
+        adjusted_font_size = max(min_font_size, adjusted_font_size)
+    item_style = ParagraphStyle(
+        'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
+        fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
+        linkUnderline=True
+    )
+    numbered_bold_style = ParagraphStyle(
+        'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
+        fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
+        leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
+        linkUnderline=True
+    )
+    section_style = ParagraphStyle(
+        'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
+        textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
+        linkUnderline=True
+    )
+    columns = [[] for _ in range(num_columns)]
+    lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
+    current_line_count = 0
+    current_column = 0
+    number_pattern = re.compile(r'^\d+\.\s')
+    for item in pdf_content:
+        if current_line_count >= lines_per_column and current_column < num_columns - 1:
+            current_column += 1
+            current_line_count = 0
+        columns[current_column].append(item)
+        current_line_count += 1
+    column_cells = [[] for _ in range(num_columns)]
+    for col_idx, column in enumerate(columns):
+        for item in column:
+            if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
+                content = item[3:-4].strip()
+                if number_pattern.match(content):
+                    column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
+                else:
+                    column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
+            else:
+                column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
+    max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
+    for cells in column_cells:
+        cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
+    table_data = list(zip(*column_cells)) if column_cells else [[]]
+    table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
+    table.setStyle(TableStyle([
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
+        ('GRID', (0, 0), (-1, -1), 0, colors.white),
+        ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
+        ('LEFTPADDING', (0, 0), (-1, -1), 2),
+        ('RIGHTPADDING', (0, 0), (-1, -1), 2),
+        ('TOPPADDING', (0, 0), (-1, -1), 1),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
+    ]))
+    story = [Spacer(1, spacer_height), table]
+    doc.build(story)
+    buffer.seek(0)
+    return buffer.getvalue()
+def pdf_to_image(pdf_bytes):
+    try:
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        images = []
+        for page in doc:
+            pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            images.append(img)
+        doc.close()
+        return images
+    except Exception as e:
+        st.error(f"Failed to render PDF preview: {e}")
+        return None
+md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
+md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
+with st.sidebar:
+    st.markdown("### PDF Options")
+    if md_options:
+        selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
+        with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
+            st.session_state.markdown_content = f.read()
+    else:
+        st.warning("No markdown file found. Please add one to your folder.")
+        selected_md = None
+        st.session_state.markdown_content = ""
+    available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
+    selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
+                                      index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
+    base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
+    render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
+    auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
+    enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
+    add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
+    # Add AutoColumns option to automatically determine column count based on line length
+    auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
+    # Auto-determine column count based on longest line if AutoColumns is checked
+    if auto_columns and 'markdown_content' in st.session_state:
+        current_markdown = st.session_state.markdown_content
+        lines = current_markdown.strip().split('\n')
+        longest_line_words = 0
+        for line in lines:
+            if line.strip():  # Skip empty lines
+                word_count = len(line.split())
+                longest_line_words = max(longest_line_words, word_count)
+        # Set recommended columns based on word count
+        if longest_line_words > 25:
+            recommended_columns = 1  # Very long lines need a single column
+        elif longest_line_words >= 18:
+            recommended_columns = 2  # Long lines need 2 columns
+        elif longest_line_words >= 11:
+            recommended_columns = 3  # Medium lines can use 3 columns
+        else:
+            recommended_columns = "Auto"  # Default to auto for shorter lines
+        st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
+    else:
+        recommended_columns = "Auto"
+    column_options = ["Auto"] + list(range(1, 7))
+    num_columns = st.selectbox("Number of Columns", options=column_options,
+                              index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
+    num_columns = 0 if num_columns == "Auto" else int(num_columns)
+    st.info("Font size and columns adjust to fit one page.")
+    # Changed label from "Modify the markdown content below:" to "Input Markdown"
+    edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
+    # Added emoji to "Update PDF" button and created a two-column layout for buttons
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("🔄📄 Update PDF"):
+            st.session_state.markdown_content = edited_markdown
+            if selected_md:
+                with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
+                    f.write(edited_markdown)
+            st.rerun()
+    # Added "Trim Emojis" button in second column
+    with col2:
+        if st.button("✂️ Trim Emojis"):
+            trimmed_content = trim_emojis_except_numbered(edited_markdown)
+            st.session_state.markdown_content = trimmed_content
+            if selected_md:
+                with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
+                    f.write(trimmed_content)
+            st.rerun()
+    prefix = get_timestamp_prefix()
+    st.download_button(
+        label="💾📝 Save Markdown",
+        data=st.session_state.markdown_content,
+        file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
+        mime="text/markdown"
+    )
+    st.markdown("### Text-to-Speech")
+    VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
+    selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
+    if st.button("Generate Audio"):
+        cleaned_text = clean_for_speech(st.session_state.markdown_content)
+        audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
+        audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
+        st.audio(audio_file)
+        with open(audio_file, "rb") as f:
+            audio_bytes = f.read()
+        st.download_button(
+            label="💾🔊 Save Audio",
+            data=audio_bytes,
+            file_name=audio_filename,
+            mime="audio/mpeg"
+        )
+with st.spinner("Generating PDF..."):
+    pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)
+with st.container():
+    pdf_images = pdf_to_image(pdf_bytes)
+    if pdf_images:
+        for img in pdf_images:
+            st.image(img, use_container_width=True)
+    else:
+        st.info("Download the PDF to view it locally.")
+with st.sidebar:
+    st.download_button(
+        label="💾📄 Save PDF",
+        data=pdf_bytes,
+        file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
+        mime="application/pdf"
+    ))
     # Track if we've seen the first numbered line already
     first_numbered_seen = False
     for line in lines:
         line = line.strip()
+        if not line:
             continue
+        # Process headings if headings_to_fonts is enabled
+        if headings_to_fonts and line.startswith('#'):
+            heading_match = heading_pattern.match(line)
+            if heading_match:
+                level = len(heading_match.group(1))  # Number of # symbols
+                heading_text = heading_match.group(2).strip()
+                # Convert the heading to bold with appropriate formatting
+                formatted_heading = f"<h{level}>{heading_text}</h{level}>"
+                pdf_content.append(formatted_heading)
+                continue
         # Check if this is a numbered line
         is_numbered_line = number_pattern.match(line) is not None
                 first_numbered_seen = True
         line = detect_and_convert_links(line)
+        # Process bold text (*word* or **word**)
+        if render_with_bold or headings_to_fonts:
+            # Handle both *word* and **word** patterns
+            line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
+            if headings_to_fonts:
+                line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
         if auto_bold_numbers and is_numbered_line:
             if not (line.startswith("<b>") and line.endswith("</b>")):
                 if "<b>" in line and "</b>" in line: