|
import os |
|
import gradio as gr |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain_groq import ChatGroq |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains import RetrievalQA |
|
from langchain.docstore.document import Document |
|
import nbformat |
|
|
|
|
|
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") |
|
|
|
|
|
def load_ipynb(file): |
|
try: |
|
with open(file.name, "r", encoding="utf-8") as f: |
|
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT) |
|
text = "" |
|
for cell in nb.cells: |
|
if cell.cell_type in ["markdown", "code"]: |
|
text += cell.source + "\n\n" |
|
return [Document(page_content=text)] |
|
except Exception as e: |
|
print("Error loading .ipynb:", e) |
|
return [] |
|
|
|
|
|
def process_files(files): |
|
try: |
|
all_docs = [] |
|
for file in files: |
|
if file.name.endswith(".pdf"): |
|
loader = PyPDFLoader(file.name) |
|
all_docs.extend(loader.load()) |
|
elif file.name.endswith(".ipynb"): |
|
all_docs.extend(load_ipynb(file)) |
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
chunks = splitter.split_documents(all_docs) |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
vectorstore = FAISS.from_documents(chunks, embeddings) |
|
retriever = vectorstore.as_retriever() |
|
|
|
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0) |
|
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) |
|
return qa_chain |
|
except Exception as e: |
|
print("Error in processing files:", e) |
|
return None |
|
|
|
|
|
qa_chain = None |
|
|
|
def upload_docs(files): |
|
global qa_chain |
|
qa_chain = process_files(files) |
|
if qa_chain is None: |
|
return "β Error processing files. Please make sure the file format is correct." |
|
return "β
PDFs or Notebooks uploaded and processed. Now ask your questions." |
|
|
|
def ask_question(query): |
|
if qa_chain is None: |
|
return "β Please upload PDFs or Kaggle Notebooks first." |
|
try: |
|
return qa_chain.run(query) |
|
except Exception as e: |
|
return f"β Error answering question: {e}" |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.") |
|
|
|
with gr.Row(): |
|
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files") |
|
btn_upload = gr.Button("π₯ Process Files") |
|
|
|
upload_output = gr.Textbox(label="Upload Status") |
|
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output) |
|
|
|
question = gr.Textbox(label="Ask a question about uploaded notebooks") |
|
answer = gr.Textbox(label="Assistant Answer", interactive=False) |
|
question.submit(fn=ask_question, inputs=question, outputs=answer) |
|
|
|
app.launch() |
|
|