Manasa1's picture
Update app.py
4afb96f verified
import os
import pdfplumber
import gradio as gr
from dotenv import load_dotenv
from groq import Groq
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
print("Groq API Key:", GROQ_API_KEY)
client = Groq(api_key=GROQ_API_KEY)
def extract_text_from_pdf(pdf_file):
text = ""
with pdfplumber.open(pdf_file.name) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
def split_text_into_chunks(text, max_chars=2000):
words = text.split()
chunks = []
chunk = ""
for word in words:
if len(chunk) + len(word) + 1 <= max_chars:
chunk += " " + word
else:
chunks.append(chunk.strip())
chunk = word
if chunk:
chunks.append(chunk.strip())
return chunks
def summarize_chunk(chunk):
prompt = f"Summarize the following PDF section:\n\n{chunk}"
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"Error during summarization: {e}"
def summarize_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
if not text.strip():
return "No extractable text found in the PDF."
chunks = split_text_into_chunks(text, max_chars=2000)
summaries = []
for i, chunk in enumerate(chunks):
summary = summarize_chunk(chunk)
summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n")
final_summary = "\n".join(summaries)
return final_summary
iface = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
outputs="text",
title="📄 PDF Summarizer with Groq",
description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model."
)
if __name__ == "__main__":
iface.launch()