PDF_Summarizer_large_file

Running

File size: 2,013 Bytes

9008d0c
e741bed
 
9384e9a
e741bed
9008d0c
7e18da0
0c8b033
4afb96f
7e18da0
590eab9
9008d0c
 
 
e741bed
9008d0c
 
 
 
 
 
3bf3fb4
 
 
 
9008d0c
3bf3fb4
 
 
 
 
 
 
 
9008d0c
3bf3fb4
9008d0c
3bf3fb4
 
9008d0c
e741bed
3bf3fb4
 
e741bed
 
9008d0c
 
 
3bf3fb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9008d0c
 
 
 
3bf3fb4
 
9008d0c

import os
import pdfplumber
import gradio as gr
from dotenv import load_dotenv
from groq import Groq

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
print("Groq API Key:", GROQ_API_KEY)  

client = Groq(api_key=GROQ_API_KEY)

def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file.name) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

def split_text_into_chunks(text, max_chars=2000):
    words = text.split()
    chunks = []
    chunk = ""

    for word in words:
        if len(chunk) + len(word) + 1 <= max_chars:
            chunk += " " + word
        else:
            chunks.append(chunk.strip())
            chunk = word
    if chunk:
        chunks.append(chunk.strip())

    return chunks

def summarize_chunk(chunk):
    prompt = f"Summarize the following PDF section:\n\n{chunk}"
    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-8b-8192",
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error during summarization: {e}"

def summarize_pdf(pdf_file):
    text = extract_text_from_pdf(pdf_file)
    if not text.strip():
        return "No extractable text found in the PDF."

    chunks = split_text_into_chunks(text, max_chars=2000)
    summaries = []

    for i, chunk in enumerate(chunks):
        summary = summarize_chunk(chunk)
        summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n")

    final_summary = "\n".join(summaries)
    return final_summary

iface = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
    outputs="text",
    title="📄 PDF Summarizer with Groq",
    description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model."
)

if __name__ == "__main__":
    iface.launch()