File size: 4,520 Bytes
f1bd7db
 
 
 
 
 
 
23f2b60
f1bd7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23f2b60
f1bd7db
 
 
 
 
 
 
 
 
 
 
 
4ae33f0
 
f1bd7db
 
 
 
 
 
 
 
 
 
 
4ae33f0
f1bd7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ae33f0
 
 
f1bd7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ae33f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# report_structure.py
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from reportlab.lib import colors
import re
import time

def generate_report(research_output: str, filename="research_report.pdf"):
    """
    Generates a structured PDF research report from the raw research output.

    Args:
        research_output: The raw string output from the Open Deep Research agent.
        filename: The desired filename for the PDF.

    Returns:
        The research report as a string (same as the input to this function, but it
        will also have generated the PDF as a side-effect).
    """
    doc = SimpleDocTemplate(filename, pagesize=letter,
                            rightMargin=72, leftMargin=72,
                            topMargin=72, bottomMargin=18)
    styles = getSampleStyleSheet()
    story = []

    # Custom Styles
    title_style = ParagraphStyle(
        'TitleStyle',
        parent=styles['Title'],
        fontSize=24,
        alignment=TA_CENTER,
        spaceAfter=24
    )
    h1_style = ParagraphStyle(
        'H1Style',
        parent=styles['Heading1'],
        fontSize=18,
        spaceBefore=12,
        spaceAfter=6
    )
    h2_style = ParagraphStyle(
        'H2Style',
        parent=styles['Heading2'],
        fontSize=14,
        spaceBefore=10,
        spaceAfter=4
    )
    normal_style = ParagraphStyle(
        'NormalStyle',
        parent=styles['Normal'],
        fontSize=12,
        alignment=TA_JUSTIFY
    )
    toc_h1_style = ParagraphStyle(
        'TOC_H1_Style',
        parent=styles['Heading2'],
    )

    # --- Title Page ---
    title = extract_title(research_output)
    story.append(Paragraph(title, title_style))
    story.append(Spacer(1, 36))
    story.append(Paragraph("Generated by MyPod Research Agent", styles['Normal']))
    story.append(Spacer(1, 12))
    story.append(Paragraph("Date: {}".format(time.strftime("%Y-%m-%d")), styles['Normal']))  # Add date
    story.append(PageBreak())

    # --- Table of Contents (Placeholder) ---
    story.append(Paragraph("Table of Contents", h1_style))
    toc_entries = extract_toc(research_output)
    for level, title_text in toc_entries:
         story.append(Paragraph(f"{title_text}", styles['Normal']))
    story.append(PageBreak())

    # --- Main Content ---
    sections = split_into_sections(research_output)
    for section_title, section_content in sections.items():
        story.append(Paragraph(section_title, h1_style))
        paragraphs = section_content.split('\n\n')  # Split by double newlines
        for para in paragraphs:
             story.append(Paragraph(para.strip(), normal_style))
             story.append(Spacer(1, 12))

    doc.build(story, onFirstPage=add_page_number, onLaterPages=add_page_number)
    return research_output

def extract_title(text):
    """Extracts the title from the research output (simple heuristic)."""
    lines = text.split("\n")
    for line in lines:
        if line.strip():
            return line.strip()
    return "Research Report"  # Default title

def extract_toc(text):
    """Extracts a simple table of contents (heuristic).  Looks for lines starting with #."""
    toc = []
    lines = text.split("\n")
    for i,line in enumerate(lines):
        if line.startswith("# ") and line.count(".")<=3:
            toc.append((1, line[2:].strip()))
        elif line.startswith("## ") and line.count(".")<=3:
            toc.append((2, line[3:].strip()))
    return toc

def split_into_sections(text):
    """Splits the research output into sections based on headings."""
    sections = {}
    current_section = "Introduction"  # Default section
    sections[current_section] = ""

    lines = text.split("\n")
    for line in lines:
        if line.startswith("# "):
            current_section = line[2:].strip()
            sections[current_section] = ""
        elif line.startswith("## "):
             current_section = line[3:].strip()
             sections[current_section] = ""
        else:
            sections[current_section] += line + "\n"

    return sections

def add_page_number(canvas, doc):
    """Adds page numbers to the footer."""
    canvas.saveState()
    canvas.setFont('Times-Roman', 9)
    canvas.drawString(letter[0] - 72, 0.75 * 72, "Page %d" % doc.page)
    canvas.restoreState()