File size: 7,558 Bytes
0b887a8 34b887b d4664d1 34b887b d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 34b887b 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 0b887a8 34b887b 0b887a8 d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 0b887a8 d4664d1 0b887a8 d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 80e430b d4664d1 34b887b d4664d1 34b887b 0b887a8 d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 34b887b d4664d1 0b887a8 d4664d1 34b887b d4664d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import gradio as gr
from openai import OpenAI
import base64
from PIL import Image
import io
import fitz # PyMuPDF
import tempfile
import os
# --- OPENAI CLIENT SETUP ---
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key='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'
)
def convert_pdf_to_images(pdf_file):
"""Convert PDF to list of PIL Images"""
images = []
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(pdf_file.read())
tmp_file_path = tmp_file.name
pdf_document = fitz.open(tmp_file_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
pdf_document.close()
os.unlink(tmp_file_path)
except Exception as e:
return f"Error converting PDF: {e}"
return images
def image_to_base64(image):
"""Convert PIL Image to base64 string"""
with io.BytesIO() as buffer:
image.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("utf-8")
def generate_summary(extracted_texts):
"""Generate a comprehensive summary of all extracted texts"""
try:
summary_prompt = f"""
You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
Please provide a comprehensive, detailed summary that:
1. Organizes all key information logically
2. Identifies relationships between data points
3. Highlights important figures, dates, names
4. Presents the information in a clear, structured format
Extracted contents from pages:
{extracted_texts}
Comprehensive Summary:
"""
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
{"role": "user", "content": summary_prompt}
],
max_tokens=2048
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating summary: {e}"
def analyze_images(images, user_prompt, selected_pages=None):
if not images:
return "No images provided for analysis."
if isinstance(images, str): # error message
return images
if selected_pages is None:
selected_pages = list(range(1, len(images) + 1))
images_to_analyze = [images[i - 1] for i in selected_pages]
all_results = []
extracted_texts = []
for idx, image in enumerate(images_to_analyze, 1):
try:
image_base64 = image_to_base64(image)
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
{"role": "user", "content": [
{"type": "text", "text": user_prompt},
{"type": "image_url", "image_url": {
"url": f"data:image/png;base64,{image_base64}"
}}
]}
],
max_tokens=1024
)
result = response.choices[0].message.content
extracted_texts.append(f"=== Page {selected_pages[idx-1]} ===\n{result}\n")
all_results.append(f"### π Page {selected_pages[idx-1]} Result:")
all_results.append(result)
all_results.append("---")
except Exception as e:
all_results.append(f"An error occurred analyzing page {selected_pages[idx-1]}: {e}")
full_result = "\n".join(all_results)
if len(extracted_texts) > 1:
full_extracted_text = "\n".join(extracted_texts)
summary = generate_summary(full_extracted_text)
full_result += "\n\n## π Comprehensive Document Summary\n"
full_result += summary
return full_result, summary
elif len(extracted_texts) == 1:
return full_result, None
else:
return "No valid results generated.", None
def process_input(file, user_prompt, page_numbers):
if file is None:
return "Please upload a file.", None
mime_type = file.type
images = []
if mime_type == "application/pdf":
images = convert_pdf_to_images(file)
if isinstance(images, str): # error message
return images, None
page_options = list(range(1, len(images) + 1))
if not page_numbers or len(page_numbers) == 0:
page_numbers = page_options
return analyze_images(images, user_prompt, page_numbers)
elif mime_type.startswith("image/"):
images = [Image.open(file)]
return analyze_images(images, user_prompt)
else:
return "Unsupported file type. Please upload a JPG/PNG/PDF.", None
# --- GRADIO INTERFACE ---
with gr.Blocks(title="DocSum - Document Summarizer") as demo:
gr.Markdown("""
<h1 style="text-align:center;">π§Ύ DocSum</h1>
<p style="text-align:center;">Document Summarizer Powered by VLM β’ Developed by <a href='https://koshurai.com' target='_blank'>Koshur AI</a></p>
""")
with gr.Row():
with gr.Column():
file_upload = gr.File(label="Upload a document (JPG/PNG/PDF)", file_types=[".jpg", ".jpeg", ".png", ".pdf"])
prompt = gr.Textbox(label="π Enter Your Prompt", value="Extract all content structurally")
page_selector = gr.CheckboxGroup(label="Select Pages (for PDFs only)", choices=[], visible=False)
def update_page_selector(file):
if file and file.type == "application/pdf":
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(file.read())
tmp_file_path = tmp_file.name
doc = fitz.open(tmp_file_path)
num_pages = len(doc)
doc.close()
os.unlink(tmp_file_path)
return gr.update(choices=list(range(1, num_pages + 1)), visible=True)
else:
return gr.update(choices=[], visible=False)
file_upload.change(fn=update_page_selector, inputs=file_upload, outputs=page_selector)
submit_btn = gr.Button("π Analyze Document")
with gr.Column():
output_box = gr.Markdown(label="Analysis Output")
summary_download = gr.File(label="Download Summary", visible=False)
def handle_submit(file, prompt, pages):
result, summary = process_input(file, prompt, pages)
summary_file = None
if summary:
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tmpfile:
tmpfile.write(summary)
summary_file = tmpfile.name
return result, summary_file
submit_btn.click(fn=handle_submit, inputs=[file_upload, prompt, page_selector], outputs=[output_box, summary_download])
gr.Markdown("<footer>Β© 2025 Koshur AI. All rights reserved.</footer>")
# Launch Gradio App
demo.launch() |