mehakkhan commited on
Commit
667e863
Β·
verified Β·
1 Parent(s): 01f1b29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -18
app.py CHANGED
@@ -6,19 +6,37 @@ from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_groq import ChatGroq
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
 
9
  from tempfile import NamedTemporaryFile
 
10
 
11
  # Load Groq API Key securely
12
  os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
13
 
14
- # Function to process PDFs
15
- def process_pdfs(files):
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  all_docs = []
17
  for file in files:
18
- with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
19
- temp_file.write(file.read())
20
- loader = PyPDFLoader(temp_file.name)
21
- all_docs.extend(loader.load())
 
 
 
22
 
23
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
24
  chunks = splitter.split_documents(all_docs)
@@ -31,31 +49,29 @@ def process_pdfs(files):
31
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
32
  return qa_chain
33
 
34
- # Global variable
35
  qa_chain = None
36
 
37
- # Upload handler
38
- def upload_pdfs(files):
39
  global qa_chain
40
- qa_chain = process_pdfs(files)
41
- return "βœ… PDFs uploaded and processed. Now ask your questions."
42
 
43
- # Question handler
44
  def ask_question(query):
45
  if qa_chain is None:
46
- return "❌ Please upload Kaggle notebooks/competition PDFs first."
47
  return qa_chain.run(query)
48
 
49
- # βœ… Gradio UI (fixed)
50
  with gr.Blocks() as app:
51
- gr.Markdown("## πŸ€– Kaggle Study Assistant\nUpload PDFs from Kaggle and ask intelligent questions.")
52
 
53
  with gr.Row():
54
- upload = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Kaggle PDFs")
55
- btn_upload = gr.Button("πŸ“₯ Process PDFs")
56
 
57
  upload_output = gr.Textbox(label="Upload Status")
58
- btn_upload.click(fn=upload_pdfs, inputs=upload, outputs=upload_output)
59
 
60
  question = gr.Textbox(label="Ask a question about uploaded notebooks")
61
  answer = gr.Textbox(label="Assistant Answer", interactive=False)
 
6
  from langchain_groq import ChatGroq
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
+ from langchain.docstore.document import Document
10
  from tempfile import NamedTemporaryFile
11
+ import nbformat
12
 
13
  # Load Groq API Key securely
14
  os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
15
 
16
+ # Helper: Read .ipynb file and extract text
17
+ def load_ipynb(file):
18
+ with NamedTemporaryFile(delete=False, suffix=".ipynb") as temp_file:
19
+ temp_file.write(file.read())
20
+ temp_file.flush()
21
+ with open(temp_file.name, "r", encoding="utf-8") as f:
22
+ nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
23
+ text = ""
24
+ for cell in nb.cells:
25
+ if cell.cell_type in ["markdown", "code"]:
26
+ text += cell.source + "\n\n"
27
+ return [Document(page_content=text)]
28
+
29
+ # Helper: Read PDF or IPYNB and build retriever chain
30
+ def process_files(files):
31
  all_docs = []
32
  for file in files:
33
+ if file.name.endswith(".pdf"):
34
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
35
+ temp_file.write(file.read())
36
+ loader = PyPDFLoader(temp_file.name)
37
+ all_docs.extend(loader.load())
38
+ elif file.name.endswith(".ipynb"):
39
+ all_docs.extend(load_ipynb(file))
40
 
41
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
42
  chunks = splitter.split_documents(all_docs)
 
49
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
50
  return qa_chain
51
 
52
+ # Global chain
53
  qa_chain = None
54
 
55
+ def upload_docs(files):
 
56
  global qa_chain
57
+ qa_chain = process_files(files)
58
+ return "βœ… PDFs or Notebooks uploaded and processed. Now ask your questions."
59
 
 
60
  def ask_question(query):
61
  if qa_chain is None:
62
+ return "❌ Please upload PDFs or Kaggle Notebooks first."
63
  return qa_chain.run(query)
64
 
65
+ # Gradio UI
66
  with gr.Blocks() as app:
67
+ gr.Markdown("## πŸ€– Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")
68
 
69
  with gr.Row():
70
+ upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
71
+ btn_upload = gr.Button("πŸ“₯ Process Files")
72
 
73
  upload_output = gr.Textbox(label="Upload Status")
74
+ btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)
75
 
76
  question = gr.Textbox(label="Ask a question about uploaded notebooks")
77
  answer = gr.Textbox(label="Assistant Answer", interactive=False)