Spaces:

adil9858
/

DOCSUM

Running

App Files Files Community

DOCSUM / app.py

adil9858

Update app.py

d4664d1 verified 7 days ago

raw

history blame contribute delete

7.56 kB

	import gradio as gr
	from openai import OpenAI
	import base64
	from PIL import Image
	import io
	import fitz # PyMuPDF
	import tempfile
	import os

	# --- OPENAI CLIENT SETUP ---
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key='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'
	)

	def convert_pdf_to_images(pdf_file):
	"""Convert PDF to list of PIL Images"""
	images = []
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(pdf_file.read())
	tmp_file_path = tmp_file.name

	pdf_document = fitz.open(tmp_file_path)
	for page_num in range(len(pdf_document)):
	page = pdf_document.load_page(page_num)
	pix = page.get_pixmap()
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	images.append(img)

	pdf_document.close()
	os.unlink(tmp_file_path)
	except Exception as e:
	return f"Error converting PDF: {e}"
	return images

	def image_to_base64(image):
	"""Convert PIL Image to base64 string"""
	with io.BytesIO() as buffer:
	image.save(buffer, format="PNG")
	return base64.b64encode(buffer.getvalue()).decode("utf-8")

	def generate_summary(extracted_texts):
	"""Generate a comprehensive summary of all extracted texts"""
	try:
	summary_prompt = f"""
	You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
	Please provide a comprehensive, detailed summary that:
	1. Organizes all key information logically
	2. Identifies relationships between data points
	3. Highlights important figures, dates, names
	4. Presents the information in a clear, structured format

	Extracted contents from pages:
	{extracted_texts}

	Comprehensive Summary:
	"""

	response = client.chat.completions.create(
	model="opengvlab/internvl3-14b:free",
	messages=[
	{"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
	{"role": "user", "content": summary_prompt}
	],
	max_tokens=2048
	)

	return response.choices[0].message.content
	except Exception as e:
	return f"Error generating summary: {e}"

	def analyze_images(images, user_prompt, selected_pages=None):
	if not images:
	return "No images provided for analysis."

	if isinstance(images, str): # error message
	return images

	if selected_pages is None:
	selected_pages = list(range(1, len(images) + 1))

	images_to_analyze = [images[i - 1] for i in selected_pages]
	all_results = []
	extracted_texts = []

	for idx, image in enumerate(images_to_analyze, 1):
	try:
	image_base64 = image_to_base64(image)

	response = client.chat.completions.create(
	model="opengvlab/internvl3-14b:free",
	messages=[
	{"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
	{"role": "user", "content": [
	{"type": "text", "text": user_prompt},
	{"type": "image_url", "image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	}}
	]}
	],
	max_tokens=1024
	)

	result = response.choices[0].message.content
	extracted_texts.append(f"=== Page {selected_pages[idx-1]} ===\n{result}\n")
	all_results.append(f"### 📄 Page {selected_pages[idx-1]} Result:")
	all_results.append(result)
	all_results.append("---")

	except Exception as e:
	all_results.append(f"An error occurred analyzing page {selected_pages[idx-1]}: {e}")

	full_result = "\n".join(all_results)

	if len(extracted_texts) > 1:
	full_extracted_text = "\n".join(extracted_texts)
	summary = generate_summary(full_extracted_text)
	full_result += "\n\n## 📝 Comprehensive Document Summary\n"
	full_result += summary
	return full_result, summary
	elif len(extracted_texts) == 1:
	return full_result, None
	else:
	return "No valid results generated.", None

	def process_input(file, user_prompt, page_numbers):
	if file is None:
	return "Please upload a file.", None

	mime_type = file.type
	images = []

	if mime_type == "application/pdf":
	images = convert_pdf_to_images(file)
	if isinstance(images, str): # error message
	return images, None
	page_options = list(range(1, len(images) + 1))
	if not page_numbers or len(page_numbers) == 0:
	page_numbers = page_options
	return analyze_images(images, user_prompt, page_numbers)
	elif mime_type.startswith("image/"):
	images = [Image.open(file)]
	return analyze_images(images, user_prompt)
	else:
	return "Unsupported file type. Please upload a JPG/PNG/PDF.", None

	# --- GRADIO INTERFACE ---
	with gr.Blocks(title="DocSum - Document Summarizer") as demo:
	gr.Markdown("""
	<h1 style="text-align:center;">🧾 DocSum</h1>
	<p style="text-align:center;">Document Summarizer Powered by VLM • Developed by <a href='https://koshurai.com' target='_blank'>Koshur AI</a></p>
	""")

	with gr.Row():
	with gr.Column():
	file_upload = gr.File(label="Upload a document (JPG/PNG/PDF)", file_types=[".jpg", ".jpeg", ".png", ".pdf"])
	prompt = gr.Textbox(label="📝 Enter Your Prompt", value="Extract all content structurally")
	page_selector = gr.CheckboxGroup(label="Select Pages (for PDFs only)", choices=[], visible=False)

	def update_page_selector(file):
	if file and file.type == "application/pdf":
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(file.read())
	tmp_file_path = tmp_file.name
	doc = fitz.open(tmp_file_path)
	num_pages = len(doc)
	doc.close()
	os.unlink(tmp_file_path)
	return gr.update(choices=list(range(1, num_pages + 1)), visible=True)
	else:
	return gr.update(choices=[], visible=False)

	file_upload.change(fn=update_page_selector, inputs=file_upload, outputs=page_selector)

	submit_btn = gr.Button("🔍 Analyze Document")

	with gr.Column():
	output_box = gr.Markdown(label="Analysis Output")
	summary_download = gr.File(label="Download Summary", visible=False)

	def handle_submit(file, prompt, pages):
	result, summary = process_input(file, prompt, pages)
	summary_file = None
	if summary:
	with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tmpfile:
	tmpfile.write(summary)
	summary_file = tmpfile.name
	return result, summary_file

	submit_btn.click(fn=handle_submit, inputs=[file_upload, prompt, page_selector], outputs=[output_box, summary_download])

	gr.Markdown("<footer>© 2025 Koshur AI. All rights reserved.</footer>")

	# Launch Gradio App
	demo.launch()