import os import pdfplumber import gradio as gr from dotenv import load_dotenv from groq import Groq load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") print("Groq API Key:", GROQ_API_KEY) client = Groq(api_key=GROQ_API_KEY) def extract_text_from_pdf(pdf_file): text = "" with pdfplumber.open(pdf_file.name) as pdf: for page in pdf.pages: page_text = page.extract_text() if page_text: text += page_text return text def split_text_into_chunks(text, max_chars=2000): words = text.split() chunks = [] chunk = "" for word in words: if len(chunk) + len(word) + 1 <= max_chars: chunk += " " + word else: chunks.append(chunk.strip()) chunk = word if chunk: chunks.append(chunk.strip()) return chunks def summarize_chunk(chunk): prompt = f"Summarize the following PDF section:\n\n{chunk}" try: response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", ) return response.choices[0].message.content.strip() except Exception as e: return f"Error during summarization: {e}" def summarize_pdf(pdf_file): text = extract_text_from_pdf(pdf_file) if not text.strip(): return "No extractable text found in the PDF." chunks = split_text_into_chunks(text, max_chars=2000) summaries = [] for i, chunk in enumerate(chunks): summary = summarize_chunk(chunk) summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n") final_summary = "\n".join(summaries) return final_summary iface = gr.Interface( fn=summarize_pdf, inputs=gr.File(label="Upload PDF", file_types=[".pdf"]), outputs="text", title="📄 PDF Summarizer with Groq", description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model." ) if __name__ == "__main__": iface.launch()