File size: 3,060 Bytes
f597613 f084f9a 3c2875a e2511d3 f597613 667e863 f597613 667e863 f597613 5cb9d7e f597613 667e863 f597613 667e863 f597613 92fdd1d f597613 667e863 f597613 667e863 f597613 667e863 f597613 667e863 5cb9d7e f597613 667e863 f597613 667e863 5cb9d7e 667e863 5cb9d7e 667e863 5cb9d7e f597613 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
from tempfile import NamedTemporaryFile
import nbformat
# Load Groq API Key securely
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# Helper: Read .ipynb file and extract text
def load_ipynb(file):
with NamedTemporaryFile(delete=False, suffix=".ipynb") as temp_file:
temp_file.write(file.read())
temp_file.flush()
with open(temp_file.name, "r", encoding="utf-8") as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
text = ""
for cell in nb.cells:
if cell.cell_type in ["markdown", "code"]:
text += cell.source + "\n\n"
return [Document(page_content=text)]
# Helper: Read PDF or IPYNB and build retriever chain
def process_files(files):
all_docs = []
for file in files:
if file.name.endswith(".pdf"):
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(file.read())
loader = PyPDFLoader(temp_file.name)
all_docs.extend(loader.load())
elif file.name.endswith(".ipynb"):
all_docs.extend(load_ipynb(file))
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(all_docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return qa_chain
# Global chain
qa_chain = None
def upload_docs(files):
global qa_chain
qa_chain = process_files(files)
return "β
PDFs or Notebooks uploaded and processed. Now ask your questions."
def ask_question(query):
if qa_chain is None:
return "β Please upload PDFs or Kaggle Notebooks first."
return qa_chain.run(query)
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")
with gr.Row():
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
btn_upload = gr.Button("π₯ Process Files")
upload_output = gr.Textbox(label="Upload Status")
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)
question = gr.Textbox(label="Ask a question about uploaded notebooks")
answer = gr.Textbox(label="Assistant Answer", interactive=False)
question.submit(fn=ask_question, inputs=question, outputs=answer)
app.launch()
|