File size: 2,301 Bytes
f597613
 
f084f9a
f597613
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92fdd1d
f597613
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatGroq
from tempfile import NamedTemporaryFile

# Load Groq API Key securely (for Hugging Face secrets)
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# Helper to process uploaded PDFs and build vectorstore
def process_pdfs(files):
    all_docs = []
    for file in files:
        with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_file.write(file.read())
            loader = PyPDFLoader(temp_file.name)
            all_docs.extend(loader.load())

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(all_docs)

    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(chunks, embeddings)
    retriever = vectorstore.as_retriever()

    llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain

# Global chain (reset with new uploads)
qa_chain = None

# Upload + Process PDFs
def upload_pdfs(files):
    global qa_chain
    qa_chain = process_pdfs(files)
    return "✅ PDFs uploaded and processed. Now ask your questions."

# Ask a question
def ask_question(query):
    if qa_chain is None:
        return "❌ Please upload Kaggle notebooks/competition PDFs first."
    result = qa_chain.run(query)
    return result

# Gradio UI
upload = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Kaggle PDFs")
btn_upload = gr.Button("Process PDFs")
question = gr.Textbox(label="Ask a question about uploaded notebooks")
answer = gr.Textbox(label="Assistant Answer")

with gr.Blocks() as app:
    gr.Markdown("## 🤖 Kaggle Study Assistant\nUpload PDFs from Kaggle and ask intelligent questions.")
    upload_output = gr.Textbox(visible=True)
    btn_upload.click(fn=upload_pdfs, inputs=upload, outputs=upload_output)
    question.submit(fn=ask_question, inputs=question, outputs=answer)

app.launch()