Spaces:
Sleeping
Sleeping
import os | |
import pdfplumber | |
import gradio as gr | |
from dotenv import load_dotenv | |
from groq import Groq | |
# Load environment variables | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
print("Groq API Key:", GROQ_API_KEY) # TEMP: Check if it's loaded | |
# Instantiate Groq client | |
client = Groq(api_key=GROQ_API_KEY) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
with pdfplumber.open(pdf_file.name) as pdf: | |
for page in pdf.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text | |
return text | |
# Split text into manageable chunks (by character count) | |
def split_text_into_chunks(text, max_chars=2000): | |
words = text.split() | |
chunks = [] | |
chunk = "" | |
for word in words: | |
if len(chunk) + len(word) + 1 <= max_chars: | |
chunk += " " + word | |
else: | |
chunks.append(chunk.strip()) | |
chunk = word | |
if chunk: | |
chunks.append(chunk.strip()) | |
return chunks | |
# Summarize a single chunk using Groq | |
def summarize_chunk(chunk): | |
prompt = f"Summarize the following PDF section:\n\n{chunk}" | |
try: | |
response = client.chat.completions.create( | |
messages=[{"role": "user", "content": prompt}], | |
model="llama3-8b-8192", | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"Error during summarization: {e}" | |
# Main summarization function | |
def summarize_pdf(pdf_file): | |
text = extract_text_from_pdf(pdf_file) | |
if not text.strip(): | |
return "No extractable text found in the PDF." | |
chunks = split_text_into_chunks(text, max_chars=2000) | |
summaries = [] | |
for i, chunk in enumerate(chunks): | |
summary = summarize_chunk(chunk) | |
summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n") | |
final_summary = "\n".join(summaries) | |
return final_summary | |
# Gradio interface | |
iface = gr.Interface( | |
fn=summarize_pdf, | |
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]), | |
outputs="text", | |
title="📄 PDF Summarizer with Groq", | |
description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |