Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_groq import ChatGroq | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain.docstore.document import Document | |
import nbformat | |
# Load Groq API Key securely | |
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") | |
# Helper: Read .ipynb file and extract text | |
def load_ipynb(file): | |
try: | |
with open(file.name, "r", encoding="utf-8") as f: | |
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT) | |
text = "" | |
for cell in nb.cells: | |
if cell.cell_type in ["markdown", "code"]: | |
text += cell.source + "\n\n" | |
return [Document(page_content=text)] | |
except Exception as e: | |
print("Error loading .ipynb:", e) | |
return [] | |
# Helper: Read PDF or IPYNB and build retriever chain | |
def process_files(files): | |
try: | |
all_docs = [] | |
for file in files: | |
if file.name.endswith(".pdf"): | |
loader = PyPDFLoader(file.name) | |
all_docs.extend(loader.load()) | |
elif file.name.endswith(".ipynb"): | |
all_docs.extend(load_ipynb(file)) | |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
chunks = splitter.split_documents(all_docs) | |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
vectorstore = FAISS.from_documents(chunks, embeddings) | |
retriever = vectorstore.as_retriever() | |
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0) | |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
return qa_chain | |
except Exception as e: | |
print("Error in processing files:", e) | |
return None | |
# Global chain | |
qa_chain = None | |
def upload_docs(files): | |
global qa_chain | |
qa_chain = process_files(files) | |
if qa_chain is None: | |
return "β Error processing files. Please make sure the file format is correct." | |
return "β PDFs or Notebooks uploaded and processed. Now ask your questions." | |
def ask_question(query): | |
if qa_chain is None: | |
return "β Please upload PDFs or Kaggle Notebooks first." | |
try: | |
return qa_chain.run(query) | |
except Exception as e: | |
return f"β Error answering question: {e}" | |
# Gradio UI | |
with gr.Blocks() as app: | |
gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.") | |
with gr.Row(): | |
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files") | |
btn_upload = gr.Button("π₯ Process Files") | |
upload_output = gr.Textbox(label="Upload Status") | |
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output) | |
question = gr.Textbox(label="Ask a question about uploaded notebooks") | |
answer = gr.Textbox(label="Assistant Answer", interactive=False) | |
question.submit(fn=ask_question, inputs=question, outputs=answer) | |
app.launch() | |