Spaces:

mahmoud666
/

RAG_APP

Sleeping

App Files Files Community

mahmoud666 commited on Mar 17

Commit

185bd57

verified ·

1 Parent(s): 311733e

Create app.py

Browse files

Files changed (1) hide show

app.py +197 -0

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import streamlit as st
+import os
+from openai import OpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.document_loaders import PyPDFLoader, TextLoader
+import tempfile
+# Page configuration
+st.set_page_config(page_title="DeepSeek RAG Chatbot", page_icon="🤖", layout="wide")
+# App title and description
+st.title("🤖 DeepSeek RAG Chatbot")
+st.subheader("A chatbot that uses your documents to give informed answers")
+# Set up API key input
+if 'DEEPSEEK_API_KEY' not in st.session_state:
+    api_key = st.text_input("Enter your DeepSeek API Key:", type="password")
+    if api_key:
+        st.session_state['DEEPSEEK_API_KEY'] = api_key
+        os.environ['DEEPSEEK_API_KEY'] = api_key
+        st.success("API Key saved!")
+        st.rerun()
+# Initialize session state variables
+if 'memory' not in st.session_state:
+    st.session_state.memory = ConversationBufferMemory(return_messages=True)
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'vectorstore' not in st.session_state:
+    st.session_state.vectorstore = None
+if 'client' not in st.session_state and 'DEEPSEEK_API_KEY' in st.session_state:
+    try:
+        # Initialize DeepSeek client for chat
+        st.session_state.client = OpenAI(
+            api_key=st.session_state['DEEPSEEK_API_KEY'],
+            base_url="https://api.deepseek.com"
+        )
+        # Initialize small HuggingFace embeddings model
+        # Using paraphrase-MiniLM-L3-v2 - a smaller version with only 22MB size
+        st.session_state.embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/paraphrase-MiniLM-L3-v2"
+        )
+        st.success("Models loaded successfully!")
+    except Exception as e:
+        st.error(f"Error initializing API: {str(e)}")
+# Function to process uploaded documents
+def process_documents(uploaded_files):
+    temp_dir = tempfile.mkdtemp()
+    for file in uploaded_files:
+        file_path = os.path.join(temp_dir, file.name)
+        with open(file_path, "wb") as f:
+            f.write(file.getbuffer())
+    # Load documents based on file type
+    documents = []
+    for file in uploaded_files:
+        if file.name.endswith('.pdf'):
+            loader = PyPDFLoader(os.path.join(temp_dir, file.name))
+            documents.extend(loader.load())
+        elif file.name.endswith('.txt'):
+            loader = TextLoader(os.path.join(temp_dir, file.name))
+            documents.extend(loader.load())
+    # Split documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200
+    )
+    document_chunks = text_splitter.split_documents(documents)
+    # Create or update vector store
+    if st.session_state.vectorstore is None:
+        st.session_state.vectorstore = FAISS.from_documents(
+            document_chunks,
+            st.session_state.embeddings
+        )
+    else:
+        # Add new documents to existing vectorstore
+        st.session_state.vectorstore.add_documents(document_chunks)
+    return len(document_chunks)
+# Function to retrieve relevant context from vector database
+def retrieve_context(query, k=3):
+    if st.session_state.vectorstore is None:
+        return ""
+    docs = st.session_state.vectorstore.similarity_search(query, k=k)
+    context = "\n\n".join([doc.page_content for doc in docs])
+    return context
+# Main application layout
+if 'DEEPSEEK_API_KEY' in st.session_state:
+    # Create a two-column layout
+    col1, col2 = st.columns([3, 1])
+    with col2:
+        st.header("Document Upload")
+        uploaded_files = st.file_uploader(
+            "Upload your documents",
+            accept_multiple_files=True,
+            type=["pdf", "txt"]
+        )
+        if uploaded_files:
+            if st.button("Process Documents"):
+                with st.spinner("Processing documents..."):
+                    num_chunks = process_documents(uploaded_files)
+                    st.success(f"Successfully processed {len(uploaded_files)} documents into {num_chunks} chunks!")
+        st.header("RAG Settings")
+        k_documents = st.slider("Number of documents to retrieve", min_value=1, max_value=10, value=3)
+        # Clear conversation button
+        if st.button("Clear Conversation"):
+            st.session_state.memory = ConversationBufferMemory(return_messages=True)
+            st.session_state.chat_history = []
+            st.success("Conversation cleared!")
+            st.rerun()
+        # Clear knowledge base button
+        if st.button("Clear Knowledge Base"):
+            st.session_state.vectorstore = None
+            st.success("Knowledge base cleared!")
+    with col1:
+        # Display chat history
+        for message in st.session_state.chat_history:
+            with st.chat_message(message["role"]):
+                st.write(message["content"])
+        # User input
+        user_input = st.chat_input("Type your message here...")
+        if user_input:
+            # Add user message to chat history
+            st.session_state.chat_history.append({"role": "user", "content": user_input})
+            # Display user message
+            with st.chat_message("user"):
+                st.write(user_input)
+            # Get model response
+            with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    try:
+                        # Retrieve relevant context from vector database
+                        context = retrieve_context(user_input, k=k_documents)
+                        # Prepare chat history for DeepSeek API
+                        system_prompt = "You are a helpful assistant with access to a knowledge base."
+                        if context:
+                            system_prompt += f"\n\nRelevant information from knowledge base:\n{context}\n\nUse this information to answer the user's question. If the information doesn't contain the answer, just say that you don't know based on the available information."
+                        messages = [{"role": "system", "content": system_prompt}]
+                        for msg in st.session_state.chat_history:
+                            messages.append({"role": msg["role"], "content": msg["content"]})
+                        # Call DeepSeek API
+                        response = st.session_state.client.chat.completions.create(
+                            model="deepseek-chat",
+                            messages=messages,
+                            stream=False
+                        )
+                        assistant_response = response.choices[0].message.content
+                        st.write(assistant_response)
+                        # Add assistant response to chat history
+                        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+                    except Exception as e:
+                        st.error(f"Error: {str(e)}")
+# Sidebar with info
+with st.sidebar:
+    st.header("About")
+    st.markdown("""
+    This RAG chatbot uses:
+    - 🦜 LangChain for memory and document processing
+    - 🔍 FAISS for vector storage and retrieval
+    - 🧠 HuggingFace for lightweight embeddings (paraphrase-MiniLM-L3-v2)
+    - 🤖 DeepSeek API for AI responses
+    - 🖥️ Streamlit for the web interface
+    The chatbot can:
+    - Upload and process PDF and text documents
+    - Retrieve relevant information from documents
+    - Generate informed responses using your documents
+    - Maintain conversation context
+    """)