Update app.py
Browse files
app.py
CHANGED
@@ -6,19 +6,37 @@ from langchain_community.document_loaders import PyPDFLoader
|
|
6 |
from langchain_groq import ChatGroq
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
from langchain.chains import RetrievalQA
|
|
|
9 |
from tempfile import NamedTemporaryFile
|
|
|
10 |
|
11 |
# Load Groq API Key securely
|
12 |
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
13 |
|
14 |
-
#
|
15 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
all_docs = []
|
17 |
for file in files:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
24 |
chunks = splitter.split_documents(all_docs)
|
@@ -31,31 +49,29 @@ def process_pdfs(files):
|
|
31 |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
32 |
return qa_chain
|
33 |
|
34 |
-
# Global
|
35 |
qa_chain = None
|
36 |
|
37 |
-
|
38 |
-
def upload_pdfs(files):
|
39 |
global qa_chain
|
40 |
-
qa_chain =
|
41 |
-
return "β
PDFs uploaded and processed. Now ask your questions."
|
42 |
|
43 |
-
# Question handler
|
44 |
def ask_question(query):
|
45 |
if qa_chain is None:
|
46 |
-
return "β Please upload Kaggle
|
47 |
return qa_chain.run(query)
|
48 |
|
49 |
-
#
|
50 |
with gr.Blocks() as app:
|
51 |
-
gr.Markdown("## π€ Kaggle Study Assistant\nUpload
|
52 |
|
53 |
with gr.Row():
|
54 |
-
upload = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Kaggle
|
55 |
-
btn_upload = gr.Button("π₯ Process
|
56 |
|
57 |
upload_output = gr.Textbox(label="Upload Status")
|
58 |
-
btn_upload.click(fn=
|
59 |
|
60 |
question = gr.Textbox(label="Ask a question about uploaded notebooks")
|
61 |
answer = gr.Textbox(label="Assistant Answer", interactive=False)
|
|
|
6 |
from langchain_groq import ChatGroq
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
from langchain.chains import RetrievalQA
|
9 |
+
from langchain.docstore.document import Document
|
10 |
from tempfile import NamedTemporaryFile
|
11 |
+
import nbformat
|
12 |
|
13 |
# Load Groq API Key securely
|
14 |
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
15 |
|
16 |
+
# Helper: Read .ipynb file and extract text
|
17 |
+
def load_ipynb(file):
|
18 |
+
with NamedTemporaryFile(delete=False, suffix=".ipynb") as temp_file:
|
19 |
+
temp_file.write(file.read())
|
20 |
+
temp_file.flush()
|
21 |
+
with open(temp_file.name, "r", encoding="utf-8") as f:
|
22 |
+
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
|
23 |
+
text = ""
|
24 |
+
for cell in nb.cells:
|
25 |
+
if cell.cell_type in ["markdown", "code"]:
|
26 |
+
text += cell.source + "\n\n"
|
27 |
+
return [Document(page_content=text)]
|
28 |
+
|
29 |
+
# Helper: Read PDF or IPYNB and build retriever chain
|
30 |
+
def process_files(files):
|
31 |
all_docs = []
|
32 |
for file in files:
|
33 |
+
if file.name.endswith(".pdf"):
|
34 |
+
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
35 |
+
temp_file.write(file.read())
|
36 |
+
loader = PyPDFLoader(temp_file.name)
|
37 |
+
all_docs.extend(loader.load())
|
38 |
+
elif file.name.endswith(".ipynb"):
|
39 |
+
all_docs.extend(load_ipynb(file))
|
40 |
|
41 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
42 |
chunks = splitter.split_documents(all_docs)
|
|
|
49 |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
50 |
return qa_chain
|
51 |
|
52 |
+
# Global chain
|
53 |
qa_chain = None
|
54 |
|
55 |
+
def upload_docs(files):
|
|
|
56 |
global qa_chain
|
57 |
+
qa_chain = process_files(files)
|
58 |
+
return "β
PDFs or Notebooks uploaded and processed. Now ask your questions."
|
59 |
|
|
|
60 |
def ask_question(query):
|
61 |
if qa_chain is None:
|
62 |
+
return "β Please upload PDFs or Kaggle Notebooks first."
|
63 |
return qa_chain.run(query)
|
64 |
|
65 |
+
# Gradio UI
|
66 |
with gr.Blocks() as app:
|
67 |
+
gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")
|
68 |
|
69 |
with gr.Row():
|
70 |
+
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
|
71 |
+
btn_upload = gr.Button("π₯ Process Files")
|
72 |
|
73 |
upload_output = gr.Textbox(label="Upload Status")
|
74 |
+
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)
|
75 |
|
76 |
question = gr.Textbox(label="Ask a question about uploaded notebooks")
|
77 |
answer = gr.Textbox(label="Assistant Answer", interactive=False)
|