SearchPod1.0 / report_structure.py
siddhartharyaai's picture
Update report_structure.py
4ae33f0 verified
# report_structure.py
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from reportlab.lib import colors
import re
import time
def generate_report(research_output: str, filename="research_report.pdf"):
"""
Generates a structured PDF research report from the raw research output.
Args:
research_output: The raw string output from the Open Deep Research agent.
filename: The desired filename for the PDF.
Returns:
The research report as a string (same as the input to this function, but it
will also have generated the PDF as a side-effect).
"""
doc = SimpleDocTemplate(filename, pagesize=letter,
rightMargin=72, leftMargin=72,
topMargin=72, bottomMargin=18)
styles = getSampleStyleSheet()
story = []
# Custom Styles
title_style = ParagraphStyle(
'TitleStyle',
parent=styles['Title'],
fontSize=24,
alignment=TA_CENTER,
spaceAfter=24
)
h1_style = ParagraphStyle(
'H1Style',
parent=styles['Heading1'],
fontSize=18,
spaceBefore=12,
spaceAfter=6
)
h2_style = ParagraphStyle(
'H2Style',
parent=styles['Heading2'],
fontSize=14,
spaceBefore=10,
spaceAfter=4
)
normal_style = ParagraphStyle(
'NormalStyle',
parent=styles['Normal'],
fontSize=12,
alignment=TA_JUSTIFY
)
toc_h1_style = ParagraphStyle(
'TOC_H1_Style',
parent=styles['Heading2'],
)
# --- Title Page ---
title = extract_title(research_output)
story.append(Paragraph(title, title_style))
story.append(Spacer(1, 36))
story.append(Paragraph("Generated by MyPod Research Agent", styles['Normal']))
story.append(Spacer(1, 12))
story.append(Paragraph("Date: {}".format(time.strftime("%Y-%m-%d")), styles['Normal'])) # Add date
story.append(PageBreak())
# --- Table of Contents (Placeholder) ---
story.append(Paragraph("Table of Contents", h1_style))
toc_entries = extract_toc(research_output)
for level, title_text in toc_entries:
story.append(Paragraph(f"{title_text}", styles['Normal']))
story.append(PageBreak())
# --- Main Content ---
sections = split_into_sections(research_output)
for section_title, section_content in sections.items():
story.append(Paragraph(section_title, h1_style))
paragraphs = section_content.split('\n\n') # Split by double newlines
for para in paragraphs:
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 12))
doc.build(story, onFirstPage=add_page_number, onLaterPages=add_page_number)
return research_output
def extract_title(text):
"""Extracts the title from the research output (simple heuristic)."""
lines = text.split("\n")
for line in lines:
if line.strip():
return line.strip()
return "Research Report" # Default title
def extract_toc(text):
"""Extracts a simple table of contents (heuristic). Looks for lines starting with #."""
toc = []
lines = text.split("\n")
for i,line in enumerate(lines):
if line.startswith("# ") and line.count(".")<=3:
toc.append((1, line[2:].strip()))
elif line.startswith("## ") and line.count(".")<=3:
toc.append((2, line[3:].strip()))
return toc
def split_into_sections(text):
"""Splits the research output into sections based on headings."""
sections = {}
current_section = "Introduction" # Default section
sections[current_section] = ""
lines = text.split("\n")
for line in lines:
if line.startswith("# "):
current_section = line[2:].strip()
sections[current_section] = ""
elif line.startswith("## "):
current_section = line[3:].strip()
sections[current_section] = ""
else:
sections[current_section] += line + "\n"
return sections
def add_page_number(canvas, doc):
"""Adds page numbers to the footer."""
canvas.saveState()
canvas.setFont('Times-Roman', 9)
canvas.drawString(letter[0] - 72, 0.75 * 72, "Page %d" % doc.page)
canvas.restoreState()