Manasa1 commited on
Commit
3bf3fb4
·
verified ·
1 Parent(s): e741bed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -4,13 +4,14 @@ import gradio as gr
4
  from dotenv import load_dotenv
5
  from groq import Groq
6
 
7
- # Load environment variables from a .env file
8
  load_dotenv()
9
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
10
 
11
- # Instantiate the Groq client
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
 
14
  def extract_text_from_pdf(pdf_file):
15
  text = ""
16
  with pdfplumber.open(pdf_file.name) as pdf:
@@ -20,37 +21,58 @@ def extract_text_from_pdf(pdf_file):
20
  text += page_text
21
  return text
22
 
23
- def summarize_pdf(pdf_file):
24
- text = extract_text_from_pdf(pdf_file)
25
- if not text.strip():
26
- return "No extractable text found in the PDF."
 
27
 
28
- # Optional: Limit the text if needed for token limits
29
- text = text[:15000]
 
 
 
 
 
 
30
 
31
- prompt = f"Summarize the following PDF content:\n\n{text}"
32
 
 
 
 
33
  try:
34
  response = client.chat.completions.create(
35
- messages=[
36
- {
37
- "role": "user",
38
- "content": prompt
39
- }
40
- ],
41
- model="llama3-8b-8192", # Replace with your desired model ID
42
  )
43
  return response.choices[0].message.content.strip()
44
  except Exception as e:
45
  return f"Error during summarization: {e}"
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Gradio interface
48
  iface = gr.Interface(
49
  fn=summarize_pdf,
50
  inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
51
  outputs="text",
52
- title="PDF Summarizer with Groq",
53
- description="Upload a PDF and get a summary using Groq's generative AI API."
54
  )
55
 
56
  if __name__ == "__main__":
 
4
  from dotenv import load_dotenv
5
  from groq import Groq
6
 
7
+ # Load environment variables
8
  load_dotenv()
9
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
10
 
11
+ # Instantiate Groq client
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
14
+ # Function to extract text from PDF
15
  def extract_text_from_pdf(pdf_file):
16
  text = ""
17
  with pdfplumber.open(pdf_file.name) as pdf:
 
21
  text += page_text
22
  return text
23
 
24
+ # Split text into manageable chunks (by character count)
25
+ def split_text_into_chunks(text, max_chars=2000):
26
+ words = text.split()
27
+ chunks = []
28
+ chunk = ""
29
 
30
+ for word in words:
31
+ if len(chunk) + len(word) + 1 <= max_chars:
32
+ chunk += " " + word
33
+ else:
34
+ chunks.append(chunk.strip())
35
+ chunk = word
36
+ if chunk:
37
+ chunks.append(chunk.strip())
38
 
39
+ return chunks
40
 
41
+ # Summarize a single chunk using Groq
42
+ def summarize_chunk(chunk):
43
+ prompt = f"Summarize the following PDF section:\n\n{chunk}"
44
  try:
45
  response = client.chat.completions.create(
46
+ messages=[{"role": "user", "content": prompt}],
47
+ model="llama3-8b-8192",
 
 
 
 
 
48
  )
49
  return response.choices[0].message.content.strip()
50
  except Exception as e:
51
  return f"Error during summarization: {e}"
52
 
53
+ # Main summarization function
54
+ def summarize_pdf(pdf_file):
55
+ text = extract_text_from_pdf(pdf_file)
56
+ if not text.strip():
57
+ return "No extractable text found in the PDF."
58
+
59
+ chunks = split_text_into_chunks(text, max_chars=2000)
60
+ summaries = []
61
+
62
+ for i, chunk in enumerate(chunks):
63
+ summary = summarize_chunk(chunk)
64
+ summaries.append(f"🔹 **Section {i+1} Summary:**\n{summary}\n")
65
+
66
+ final_summary = "\n".join(summaries)
67
+ return final_summary
68
+
69
  # Gradio interface
70
  iface = gr.Interface(
71
  fn=summarize_pdf,
72
  inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
73
  outputs="text",
74
+ title="📄 PDF Summarizer with Groq",
75
+ description="Upload a large PDF and get section-wise AI summaries using Groq's LLaMA3 model."
76
  )
77
 
78
  if __name__ == "__main__":