DOCSUM / app.py
adil9858's picture
Update app.py
d4664d1 verified
import gradio as gr
from openai import OpenAI
import base64
from PIL import Image
import io
import fitz # PyMuPDF
import tempfile
import os
# --- OPENAI CLIENT SETUP ---
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key='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'
)
def convert_pdf_to_images(pdf_file):
"""Convert PDF to list of PIL Images"""
images = []
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(pdf_file.read())
tmp_file_path = tmp_file.name
pdf_document = fitz.open(tmp_file_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
pdf_document.close()
os.unlink(tmp_file_path)
except Exception as e:
return f"Error converting PDF: {e}"
return images
def image_to_base64(image):
"""Convert PIL Image to base64 string"""
with io.BytesIO() as buffer:
image.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("utf-8")
def generate_summary(extracted_texts):
"""Generate a comprehensive summary of all extracted texts"""
try:
summary_prompt = f"""
You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
Please provide a comprehensive, detailed summary that:
1. Organizes all key information logically
2. Identifies relationships between data points
3. Highlights important figures, dates, names
4. Presents the information in a clear, structured format
Extracted contents from pages:
{extracted_texts}
Comprehensive Summary:
"""
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
{"role": "user", "content": summary_prompt}
],
max_tokens=2048
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating summary: {e}"
def analyze_images(images, user_prompt, selected_pages=None):
if not images:
return "No images provided for analysis."
if isinstance(images, str): # error message
return images
if selected_pages is None:
selected_pages = list(range(1, len(images) + 1))
images_to_analyze = [images[i - 1] for i in selected_pages]
all_results = []
extracted_texts = []
for idx, image in enumerate(images_to_analyze, 1):
try:
image_base64 = image_to_base64(image)
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
{"role": "user", "content": [
{"type": "text", "text": user_prompt},
{"type": "image_url", "image_url": {
"url": f"data:image/png;base64,{image_base64}"
}}
]}
],
max_tokens=1024
)
result = response.choices[0].message.content
extracted_texts.append(f"=== Page {selected_pages[idx-1]} ===\n{result}\n")
all_results.append(f"### πŸ“„ Page {selected_pages[idx-1]} Result:")
all_results.append(result)
all_results.append("---")
except Exception as e:
all_results.append(f"An error occurred analyzing page {selected_pages[idx-1]}: {e}")
full_result = "\n".join(all_results)
if len(extracted_texts) > 1:
full_extracted_text = "\n".join(extracted_texts)
summary = generate_summary(full_extracted_text)
full_result += "\n\n## πŸ“ Comprehensive Document Summary\n"
full_result += summary
return full_result, summary
elif len(extracted_texts) == 1:
return full_result, None
else:
return "No valid results generated.", None
def process_input(file, user_prompt, page_numbers):
if file is None:
return "Please upload a file.", None
mime_type = file.type
images = []
if mime_type == "application/pdf":
images = convert_pdf_to_images(file)
if isinstance(images, str): # error message
return images, None
page_options = list(range(1, len(images) + 1))
if not page_numbers or len(page_numbers) == 0:
page_numbers = page_options
return analyze_images(images, user_prompt, page_numbers)
elif mime_type.startswith("image/"):
images = [Image.open(file)]
return analyze_images(images, user_prompt)
else:
return "Unsupported file type. Please upload a JPG/PNG/PDF.", None
# --- GRADIO INTERFACE ---
with gr.Blocks(title="DocSum - Document Summarizer") as demo:
gr.Markdown("""
<h1 style="text-align:center;">🧾 DocSum</h1>
<p style="text-align:center;">Document Summarizer Powered by VLM β€’ Developed by <a href='https://koshurai.com' target='_blank'>Koshur AI</a></p>
""")
with gr.Row():
with gr.Column():
file_upload = gr.File(label="Upload a document (JPG/PNG/PDF)", file_types=[".jpg", ".jpeg", ".png", ".pdf"])
prompt = gr.Textbox(label="πŸ“ Enter Your Prompt", value="Extract all content structurally")
page_selector = gr.CheckboxGroup(label="Select Pages (for PDFs only)", choices=[], visible=False)
def update_page_selector(file):
if file and file.type == "application/pdf":
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(file.read())
tmp_file_path = tmp_file.name
doc = fitz.open(tmp_file_path)
num_pages = len(doc)
doc.close()
os.unlink(tmp_file_path)
return gr.update(choices=list(range(1, num_pages + 1)), visible=True)
else:
return gr.update(choices=[], visible=False)
file_upload.change(fn=update_page_selector, inputs=file_upload, outputs=page_selector)
submit_btn = gr.Button("πŸ” Analyze Document")
with gr.Column():
output_box = gr.Markdown(label="Analysis Output")
summary_download = gr.File(label="Download Summary", visible=False)
def handle_submit(file, prompt, pages):
result, summary = process_input(file, prompt, pages)
summary_file = None
if summary:
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tmpfile:
tmpfile.write(summary)
summary_file = tmpfile.name
return result, summary_file
submit_btn.click(fn=handle_submit, inputs=[file_upload, prompt, page_selector], outputs=[output_box, summary_download])
gr.Markdown("<footer>Β© 2025 Koshur AI. All rights reserved.</footer>")
# Launch Gradio App
demo.launch()