mehakkhan's picture
Update app.py
12d44b9 verified
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
import nbformat
# Load Groq API Key securely
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# Helper: Read .ipynb file and extract text
def load_ipynb(file):
try:
with open(file.name, "r", encoding="utf-8") as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
text = ""
for cell in nb.cells:
if cell.cell_type in ["markdown", "code"]:
text += cell.source + "\n\n"
return [Document(page_content=text)]
except Exception as e:
print("Error loading .ipynb:", e)
return []
# Helper: Read PDF or IPYNB and build retriever chain
def process_files(files):
try:
all_docs = []
for file in files:
if file.name.endswith(".pdf"):
loader = PyPDFLoader(file.name)
all_docs.extend(loader.load())
elif file.name.endswith(".ipynb"):
all_docs.extend(load_ipynb(file))
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(all_docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return qa_chain
except Exception as e:
print("Error in processing files:", e)
return None
# Global chain
qa_chain = None
def upload_docs(files):
global qa_chain
qa_chain = process_files(files)
if qa_chain is None:
return "❌ Error processing files. Please make sure the file format is correct."
return "βœ… PDFs or Notebooks uploaded and processed. Now ask your questions."
def ask_question(query):
if qa_chain is None:
return "❌ Please upload PDFs or Kaggle Notebooks first."
try:
return qa_chain.run(query)
except Exception as e:
return f"⚠ Error answering question: {e}"
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("## πŸ€– Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")
with gr.Row():
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
btn_upload = gr.Button("πŸ“₯ Process Files")
upload_output = gr.Textbox(label="Upload Status")
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)
question = gr.Textbox(label="Ask a question about uploaded notebooks")
answer = gr.Textbox(label="Assistant Answer", interactive=False)
question.submit(fn=ask_question, inputs=question, outputs=answer)
app.launch()