siddhartharyaai commited on
Commit
f1bd7db
·
verified ·
1 Parent(s): 905e8ce

Create report_structure.py

Browse files
Files changed (1) hide show
  1. report_structure.py +136 -0
report_structure.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # report_structure.py
2
+ from reportlab.lib.pagesizes import letter
3
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
4
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
5
+ from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
6
+ from reportlab.lib import colors
7
+ import re
8
+
9
+ def generate_report(research_output: str, filename="research_report.pdf"):
10
+ """
11
+ Generates a structured PDF research report from the raw research output.
12
+
13
+ Args:
14
+ research_output: The raw string output from the Open Deep Research agent.
15
+ filename: The desired filename for the PDF.
16
+
17
+ Returns:
18
+ The research report as a string (same as the input to this function, but it
19
+ will also have generated the PDF as a side-effect).
20
+ """
21
+ doc = SimpleDocTemplate(filename, pagesize=letter,
22
+ rightMargin=72, leftMargin=72,
23
+ topMargin=72, bottomMargin=18)
24
+ styles = getSampleStyleSheet()
25
+ story = []
26
+
27
+ # Custom Styles
28
+ title_style = ParagraphStyle(
29
+ 'TitleStyle',
30
+ parent=styles['Title'],
31
+ fontSize=24,
32
+ alignment=TA_CENTER,
33
+ spaceAfter=24
34
+ )
35
+ h1_style = ParagraphStyle(
36
+ 'H1Style',
37
+ parent=styles['Heading1'],
38
+ fontSize=18,
39
+ spaceBefore=12,
40
+ spaceAfter=6
41
+ )
42
+ h2_style = ParagraphStyle(
43
+ 'H2Style',
44
+ parent=styles['Heading2'],
45
+ fontSize=14,
46
+ spaceBefore=10,
47
+ spaceAfter=4
48
+ )
49
+ normal_style = ParagraphStyle(
50
+ 'NormalStyle',
51
+ parent=styles['Normal'],
52
+ fontSize=12,
53
+ alignment=TA_JUSTIFY
54
+ )
55
+ toc_h1_style = ParagraphStyle(
56
+ 'TOC_H1_Style',
57
+ parent=styles['Heading2'],
58
+
59
+ )
60
+
61
+ # --- Title Page ---
62
+ title = extract_title(research_output)
63
+ story.append(Paragraph(title, title_style))
64
+ story.append(Spacer(1, 36))
65
+ story.append(Paragraph("Generated by MyPod Research Agent", styles['Normal']))
66
+ story.append(Spacer(1, 12))
67
+ story.append(Paragraph("Date: {}".format(time.strftime("%Y-%m-%d")), styles['Normal'])) # Add date
68
+ story.append(PageBreak())
69
+
70
+ # --- Table of Contents (Placeholder) ---
71
+ story.append(Paragraph("Table of Contents", h1_style))
72
+ toc_entries = extract_toc(research_output)
73
+ for level, title in toc_entries:
74
+ story.append(Paragraph(f"{title}", styles['Normal']))
75
+ story.append(PageBreak())
76
+
77
+ # --- Main Content ---
78
+ sections = split_into_sections(research_output)
79
+ for section_title, section_content in sections.items():
80
+ story.append(Paragraph(section_title, h1_style))
81
+ paragraphs = section_content.split('\n\n') # Split by double newlines
82
+ for para in paragraphs:
83
+ story.append(Paragraph(para.strip(), normal_style))
84
+ story.append(Spacer(1, 12))
85
+
86
+ doc.build(story)
87
+ return research_output
88
+
89
+
90
+
91
+ def extract_title(text):
92
+ """Extracts the title from the research output (simple heuristic)."""
93
+ lines = text.split("\n")
94
+ for line in lines:
95
+ if line.strip():
96
+ return line.strip()
97
+ return "Research Report" # Default title
98
+
99
+ def extract_toc(text):
100
+ """Extracts a simple table of contents (heuristic). Looks for lines starting with #."""
101
+ toc = []
102
+ lines = text.split("\n")
103
+ for i,line in enumerate(lines):
104
+ if line.startswith("# ") and line.count(".")<=3:
105
+ toc.append((1, line[2:].strip())) # Level 1 heading
106
+ elif line.startswith("## ")and line.count(".")<=3:
107
+ toc.append((2, line[3:].strip())) # Level 2 heading
108
+ return toc
109
+
110
+
111
+ def split_into_sections(text):
112
+ """Splits the research output into sections based on headings."""
113
+ sections = {}
114
+ current_section = "Introduction" # Default section
115
+ sections[current_section] = ""
116
+
117
+ lines = text.split("\n")
118
+ for line in lines:
119
+ if line.startswith("# "):
120
+ current_section = line[2:].strip()
121
+ sections[current_section] = ""
122
+ elif line.startswith("## "):
123
+ current_section = line[3:].strip()
124
+ sections[current_section] = ""
125
+ else:
126
+ sections[current_section] += line + "\n"
127
+
128
+ return sections
129
+
130
+
131
+ def add_page_number(canvas, doc):
132
+ """Adds page numbers to the footer."""
133
+ canvas.saveState()
134
+ canvas.setFont('Times-Roman', 9)
135
+ canvas.drawString(letter[0] - 72, 0.75 * 72, "Page %d" % doc.page)
136
+ canvas.restoreState()