|
import gradio as gr |
|
from openai import OpenAI |
|
import base64 |
|
from PIL import Image |
|
import io |
|
import fitz |
|
import tempfile |
|
import os |
|
|
|
|
|
client = OpenAI( |
|
base_url="https://openrouter.ai/api/v1", |
|
api_key='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' |
|
) |
|
|
|
def convert_pdf_to_images(pdf_file): |
|
"""Convert PDF to list of PIL Images""" |
|
images = [] |
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
|
tmp_file.write(pdf_file.read()) |
|
tmp_file_path = tmp_file.name |
|
|
|
pdf_document = fitz.open(tmp_file_path) |
|
for page_num in range(len(pdf_document)): |
|
page = pdf_document.load_page(page_num) |
|
pix = page.get_pixmap() |
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
images.append(img) |
|
|
|
pdf_document.close() |
|
os.unlink(tmp_file_path) |
|
except Exception as e: |
|
return f"Error converting PDF: {e}" |
|
return images |
|
|
|
def image_to_base64(image): |
|
"""Convert PIL Image to base64 string""" |
|
with io.BytesIO() as buffer: |
|
image.save(buffer, format="PNG") |
|
return base64.b64encode(buffer.getvalue()).decode("utf-8") |
|
|
|
def generate_summary(extracted_texts): |
|
"""Generate a comprehensive summary of all extracted texts""" |
|
try: |
|
summary_prompt = f""" |
|
You are an expert document analyst. Below are the extracted contents from multiple pages of a document. |
|
Please provide a comprehensive, detailed summary that: |
|
1. Organizes all key information logically |
|
2. Identifies relationships between data points |
|
3. Highlights important figures, dates, names |
|
4. Presents the information in a clear, structured format |
|
|
|
Extracted contents from pages: |
|
{extracted_texts} |
|
|
|
Comprehensive Summary: |
|
""" |
|
|
|
response = client.chat.completions.create( |
|
model="opengvlab/internvl3-14b:free", |
|
messages=[ |
|
{"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."}, |
|
{"role": "user", "content": summary_prompt} |
|
], |
|
max_tokens=2048 |
|
) |
|
|
|
return response.choices[0].message.content |
|
except Exception as e: |
|
return f"Error generating summary: {e}" |
|
|
|
def analyze_images(images, user_prompt, selected_pages=None): |
|
if not images: |
|
return "No images provided for analysis." |
|
|
|
if isinstance(images, str): |
|
return images |
|
|
|
if selected_pages is None: |
|
selected_pages = list(range(1, len(images) + 1)) |
|
|
|
images_to_analyze = [images[i - 1] for i in selected_pages] |
|
all_results = [] |
|
extracted_texts = [] |
|
|
|
for idx, image in enumerate(images_to_analyze, 1): |
|
try: |
|
image_base64 = image_to_base64(image) |
|
|
|
response = client.chat.completions.create( |
|
model="opengvlab/internvl3-14b:free", |
|
messages=[ |
|
{"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."}, |
|
{"role": "user", "content": [ |
|
{"type": "text", "text": user_prompt}, |
|
{"type": "image_url", "image_url": { |
|
"url": f"data:image/png;base64,{image_base64}" |
|
}} |
|
]} |
|
], |
|
max_tokens=1024 |
|
) |
|
|
|
result = response.choices[0].message.content |
|
extracted_texts.append(f"=== Page {selected_pages[idx-1]} ===\n{result}\n") |
|
all_results.append(f"### π Page {selected_pages[idx-1]} Result:") |
|
all_results.append(result) |
|
all_results.append("---") |
|
|
|
except Exception as e: |
|
all_results.append(f"An error occurred analyzing page {selected_pages[idx-1]}: {e}") |
|
|
|
full_result = "\n".join(all_results) |
|
|
|
if len(extracted_texts) > 1: |
|
full_extracted_text = "\n".join(extracted_texts) |
|
summary = generate_summary(full_extracted_text) |
|
full_result += "\n\n## π Comprehensive Document Summary\n" |
|
full_result += summary |
|
return full_result, summary |
|
elif len(extracted_texts) == 1: |
|
return full_result, None |
|
else: |
|
return "No valid results generated.", None |
|
|
|
def process_input(file, user_prompt, page_numbers): |
|
if file is None: |
|
return "Please upload a file.", None |
|
|
|
mime_type = file.type |
|
images = [] |
|
|
|
if mime_type == "application/pdf": |
|
images = convert_pdf_to_images(file) |
|
if isinstance(images, str): |
|
return images, None |
|
page_options = list(range(1, len(images) + 1)) |
|
if not page_numbers or len(page_numbers) == 0: |
|
page_numbers = page_options |
|
return analyze_images(images, user_prompt, page_numbers) |
|
elif mime_type.startswith("image/"): |
|
images = [Image.open(file)] |
|
return analyze_images(images, user_prompt) |
|
else: |
|
return "Unsupported file type. Please upload a JPG/PNG/PDF.", None |
|
|
|
|
|
with gr.Blocks(title="DocSum - Document Summarizer") as demo: |
|
gr.Markdown(""" |
|
<h1 style="text-align:center;">π§Ύ DocSum</h1> |
|
<p style="text-align:center;">Document Summarizer Powered by VLM β’ Developed by <a href='https://koshurai.com' target='_blank'>Koshur AI</a></p> |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
file_upload = gr.File(label="Upload a document (JPG/PNG/PDF)", file_types=[".jpg", ".jpeg", ".png", ".pdf"]) |
|
prompt = gr.Textbox(label="π Enter Your Prompt", value="Extract all content structurally") |
|
page_selector = gr.CheckboxGroup(label="Select Pages (for PDFs only)", choices=[], visible=False) |
|
|
|
def update_page_selector(file): |
|
if file and file.type == "application/pdf": |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
|
tmp_file.write(file.read()) |
|
tmp_file_path = tmp_file.name |
|
doc = fitz.open(tmp_file_path) |
|
num_pages = len(doc) |
|
doc.close() |
|
os.unlink(tmp_file_path) |
|
return gr.update(choices=list(range(1, num_pages + 1)), visible=True) |
|
else: |
|
return gr.update(choices=[], visible=False) |
|
|
|
file_upload.change(fn=update_page_selector, inputs=file_upload, outputs=page_selector) |
|
|
|
submit_btn = gr.Button("π Analyze Document") |
|
|
|
with gr.Column(): |
|
output_box = gr.Markdown(label="Analysis Output") |
|
summary_download = gr.File(label="Download Summary", visible=False) |
|
|
|
def handle_submit(file, prompt, pages): |
|
result, summary = process_input(file, prompt, pages) |
|
summary_file = None |
|
if summary: |
|
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tmpfile: |
|
tmpfile.write(summary) |
|
summary_file = tmpfile.name |
|
return result, summary_file |
|
|
|
submit_btn.click(fn=handle_submit, inputs=[file_upload, prompt, page_selector], outputs=[output_box, summary_download]) |
|
|
|
gr.Markdown("<footer>Β© 2025 Koshur AI. All rights reserved.</footer>") |
|
|
|
|
|
demo.launch() |