Spaces:
Sleeping
Sleeping
Create report_structure.py
Browse files- report_structure.py +136 -0
report_structure.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# report_structure.py
|
2 |
+
from reportlab.lib.pagesizes import letter
|
3 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
|
4 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
5 |
+
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
|
6 |
+
from reportlab.lib import colors
|
7 |
+
import re
|
8 |
+
|
9 |
+
def generate_report(research_output: str, filename="research_report.pdf"):
|
10 |
+
"""
|
11 |
+
Generates a structured PDF research report from the raw research output.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
research_output: The raw string output from the Open Deep Research agent.
|
15 |
+
filename: The desired filename for the PDF.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
The research report as a string (same as the input to this function, but it
|
19 |
+
will also have generated the PDF as a side-effect).
|
20 |
+
"""
|
21 |
+
doc = SimpleDocTemplate(filename, pagesize=letter,
|
22 |
+
rightMargin=72, leftMargin=72,
|
23 |
+
topMargin=72, bottomMargin=18)
|
24 |
+
styles = getSampleStyleSheet()
|
25 |
+
story = []
|
26 |
+
|
27 |
+
# Custom Styles
|
28 |
+
title_style = ParagraphStyle(
|
29 |
+
'TitleStyle',
|
30 |
+
parent=styles['Title'],
|
31 |
+
fontSize=24,
|
32 |
+
alignment=TA_CENTER,
|
33 |
+
spaceAfter=24
|
34 |
+
)
|
35 |
+
h1_style = ParagraphStyle(
|
36 |
+
'H1Style',
|
37 |
+
parent=styles['Heading1'],
|
38 |
+
fontSize=18,
|
39 |
+
spaceBefore=12,
|
40 |
+
spaceAfter=6
|
41 |
+
)
|
42 |
+
h2_style = ParagraphStyle(
|
43 |
+
'H2Style',
|
44 |
+
parent=styles['Heading2'],
|
45 |
+
fontSize=14,
|
46 |
+
spaceBefore=10,
|
47 |
+
spaceAfter=4
|
48 |
+
)
|
49 |
+
normal_style = ParagraphStyle(
|
50 |
+
'NormalStyle',
|
51 |
+
parent=styles['Normal'],
|
52 |
+
fontSize=12,
|
53 |
+
alignment=TA_JUSTIFY
|
54 |
+
)
|
55 |
+
toc_h1_style = ParagraphStyle(
|
56 |
+
'TOC_H1_Style',
|
57 |
+
parent=styles['Heading2'],
|
58 |
+
|
59 |
+
)
|
60 |
+
|
61 |
+
# --- Title Page ---
|
62 |
+
title = extract_title(research_output)
|
63 |
+
story.append(Paragraph(title, title_style))
|
64 |
+
story.append(Spacer(1, 36))
|
65 |
+
story.append(Paragraph("Generated by MyPod Research Agent", styles['Normal']))
|
66 |
+
story.append(Spacer(1, 12))
|
67 |
+
story.append(Paragraph("Date: {}".format(time.strftime("%Y-%m-%d")), styles['Normal'])) # Add date
|
68 |
+
story.append(PageBreak())
|
69 |
+
|
70 |
+
# --- Table of Contents (Placeholder) ---
|
71 |
+
story.append(Paragraph("Table of Contents", h1_style))
|
72 |
+
toc_entries = extract_toc(research_output)
|
73 |
+
for level, title in toc_entries:
|
74 |
+
story.append(Paragraph(f"{title}", styles['Normal']))
|
75 |
+
story.append(PageBreak())
|
76 |
+
|
77 |
+
# --- Main Content ---
|
78 |
+
sections = split_into_sections(research_output)
|
79 |
+
for section_title, section_content in sections.items():
|
80 |
+
story.append(Paragraph(section_title, h1_style))
|
81 |
+
paragraphs = section_content.split('\n\n') # Split by double newlines
|
82 |
+
for para in paragraphs:
|
83 |
+
story.append(Paragraph(para.strip(), normal_style))
|
84 |
+
story.append(Spacer(1, 12))
|
85 |
+
|
86 |
+
doc.build(story)
|
87 |
+
return research_output
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
def extract_title(text):
|
92 |
+
"""Extracts the title from the research output (simple heuristic)."""
|
93 |
+
lines = text.split("\n")
|
94 |
+
for line in lines:
|
95 |
+
if line.strip():
|
96 |
+
return line.strip()
|
97 |
+
return "Research Report" # Default title
|
98 |
+
|
99 |
+
def extract_toc(text):
|
100 |
+
"""Extracts a simple table of contents (heuristic). Looks for lines starting with #."""
|
101 |
+
toc = []
|
102 |
+
lines = text.split("\n")
|
103 |
+
for i,line in enumerate(lines):
|
104 |
+
if line.startswith("# ") and line.count(".")<=3:
|
105 |
+
toc.append((1, line[2:].strip())) # Level 1 heading
|
106 |
+
elif line.startswith("## ")and line.count(".")<=3:
|
107 |
+
toc.append((2, line[3:].strip())) # Level 2 heading
|
108 |
+
return toc
|
109 |
+
|
110 |
+
|
111 |
+
def split_into_sections(text):
|
112 |
+
"""Splits the research output into sections based on headings."""
|
113 |
+
sections = {}
|
114 |
+
current_section = "Introduction" # Default section
|
115 |
+
sections[current_section] = ""
|
116 |
+
|
117 |
+
lines = text.split("\n")
|
118 |
+
for line in lines:
|
119 |
+
if line.startswith("# "):
|
120 |
+
current_section = line[2:].strip()
|
121 |
+
sections[current_section] = ""
|
122 |
+
elif line.startswith("## "):
|
123 |
+
current_section = line[3:].strip()
|
124 |
+
sections[current_section] = ""
|
125 |
+
else:
|
126 |
+
sections[current_section] += line + "\n"
|
127 |
+
|
128 |
+
return sections
|
129 |
+
|
130 |
+
|
131 |
+
def add_page_number(canvas, doc):
|
132 |
+
"""Adds page numbers to the footer."""
|
133 |
+
canvas.saveState()
|
134 |
+
canvas.setFont('Times-Roman', 9)
|
135 |
+
canvas.drawString(letter[0] - 72, 0.75 * 72, "Page %d" % doc.page)
|
136 |
+
canvas.restoreState()
|