File size: 2,013 Bytes
9008d0c e741bed 9384e9a e741bed 9008d0c 7e18da0 0c8b033 4afb96f 7e18da0 590eab9 9008d0c e741bed 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c e741bed 3bf3fb4 e741bed 9008d0c 3bf3fb4 9008d0c 3bf3fb4 9008d0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import pdfplumber
import gradio as gr
from dotenv import load_dotenv
from groq import Groq
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
print("Groq API Key:", GROQ_API_KEY)
client = Groq(api_key=GROQ_API_KEY)
def extract_text_from_pdf(pdf_file):
text = ""
with pdfplumber.open(pdf_file.name) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
def split_text_into_chunks(text, max_chars=2000):
words = text.split()
chunks = []
chunk = ""
for word in words:
if len(chunk) + len(word) + 1 <= max_chars:
chunk += " " + word
else:
chunks.append(chunk.strip())
chunk = word
if chunk:
chunks.append(chunk.strip())
return chunks
def summarize_chunk(chunk):
prompt = f"Summarize the following PDF section:\n\n{chunk}"
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"Error during summarization: {e}"
def summarize_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
if not text.strip():
return "No extractable text found in the PDF."
chunks = split_text_into_chunks(text, max_chars=2000)
summaries = []
for i, chunk in enumerate(chunks):
summary = summarize_chunk(chunk)
summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n")
final_summary = "\n".join(summaries)
return final_summary
iface = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
outputs="text",
title="📄 PDF Summarizer with Groq",
description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model."
)
if __name__ == "__main__":
iface.launch()
|