Spaces:
Sleeping
Sleeping
File size: 3,083 Bytes
57d7e14 6ba14cf 57d7e14 4e75429 57d7e14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
from groq import Groq
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import tempfile
import os
# Set API Key
GROQ_API_KEY = os.getenv("your_groq_api_key")
# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)
# Streamlit App Title
st.title("π RAG Chatbot with Using Deepseek")
# File Upload
uploaded_files = st.file_uploader("π Upload PDFs", type=["pdf"], accept_multiple_files=True)
if uploaded_files:
with st.spinner("Processing PDFs... β³"):
docs = []
for file in uploaded_files:
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
temp_pdf.write(file.read())
temp_pdf_path = temp_pdf.name
# Load PDF and extract text
loader = PyPDFLoader(temp_pdf_path)
docs.extend(loader.load())
# Clean up temp file
os.remove(temp_pdf_path)
# Split text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_documents(docs)
# Create embeddings & FAISS vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(split_docs, embeddings)
st.success("β
PDFs processed and stored in the vector database!")
# User Input
query = st.text_input("π Ask a question:")
if query:
with st.spinner("Retrieving answer... β³"):
# Perform retrieval
retriever = vector_db.as_retriever()
relevant_docs = retriever.get_relevant_documents(query)
# Get context from retrieved docs
context = "\n\n".join([doc.page_content for doc in relevant_docs])
# Make API call to Groq LLaMA-70B
completion = client.chat.completions.create(
model="deepseek-r1-distill-llama-70b",
messages=[
{"role": "system", "content": "You are an AI assistant providing answers based on the given context."},
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
],
temperature=0.6,
max_completion_tokens=1024,
top_p=0.95,
stream=True,
reasoning_format="raw"
)
# Stream response
response_text = ""
response_container = st.empty()
for chunk in completion:
response_text += chunk.choices[0].delta.content or ""
response_container.markdown(response_text)
st.success("β
Answer generated!")
|