File size: 2,209 Bytes
9008d0c e741bed 9384e9a e741bed 9008d0c 3bf3fb4 7e18da0 0c8b033 7e18da0 3bf3fb4 590eab9 9008d0c 3bf3fb4 9008d0c e741bed 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c e741bed 3bf3fb4 e741bed 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
import pdfplumber
import gradio as gr
from dotenv import load_dotenv
from groq import Groq
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Instantiate Groq client
client = Groq(api_key=GROQ_API_KEY)
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
text = ""
with pdfplumber.open(pdf_file.name) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
# Split text into manageable chunks (by character count)
def split_text_into_chunks(text, max_chars=2000):
words = text.split()
chunks = []
chunk = ""
for word in words:
if len(chunk) + len(word) + 1 <= max_chars:
chunk += " " + word
else:
chunks.append(chunk.strip())
chunk = word
if chunk:
chunks.append(chunk.strip())
return chunks
# Summarize a single chunk using Groq
def summarize_chunk(chunk):
prompt = f"Summarize the following PDF section:\n\n{chunk}"
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"Error during summarization: {e}"
# Main summarization function
def summarize_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
if not text.strip():
return "No extractable text found in the PDF."
chunks = split_text_into_chunks(text, max_chars=2000)
summaries = []
for i, chunk in enumerate(chunks):
summary = summarize_chunk(chunk)
summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n")
final_summary = "\n".join(summaries)
return final_summary
# Gradio interface
iface = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
outputs="text",
title="📄 PDF Summarizer with Groq",
description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model."
)
if __name__ == "__main__":
iface.launch()
|