CSV-ChatBot

Sleeping

App Files Files Community

shahtab

RustX commited on May 5, 2023

Commit

29971f8

0 Parent(s):

Duplicate from RustX/CSV-ChatBot

Browse files

Co-authored-by: Rustam Ismailov <[email protected]>

Files changed (13) hide show

.gitattributes +34 -0
.streamlit/config.toml +7 -0
Dockerfile +20 -0
README.md +11 -0
chatbot_csv.py +109 -0
config/config.toml +13 -0
modules/chatbot.py +49 -0
modules/embedder.py +58 -0
modules/history.py +57 -0
modules/layout.py +42 -0
modules/sidebar.py +55 -0
modules/utils.py +62 -0
requirements.txt +75 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[theme]
+base = "light"
+primaryColor = "#89CFF0"
+backgroundColor = "#E0F7FE"
+secondaryBackgroundColor = "#FFFCE4"
+textColor = "#000000"
+font = "sans serif"

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.9.5
+WORKDIR /code
+COPY requirements.txt ./
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+        PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY --chown=user config/config.toml $HOME/app/.streamlit/config.toml
+CMD ["streamlit" , "run", "chatbot_csv.py", "--server.port=7860", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: CSV ChatBot
+emoji: 💻
+colorFrom: indigo
+colorTo: pink
+sdk: docker
+pinned: false
+duplicated_from: RustX/CSV-ChatBot
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

chatbot_csv.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from io import BytesIO
+from io import StringIO
+import sys
+import re
+from langchain.agents import create_csv_agent
+from langchain.chat_models import ChatOpenAI
+from modules.history import ChatHistory
+from modules.layout import Layout
+from modules.utils import Utilities
+from modules.sidebar import Sidebar
+#To be able to update the changes made to modules in localhost,
+#you can press the "r" key on the localhost page to refresh and reflect the changes made to the module files.
+def reload_module(module_name):
+    import importlib
+    import sys
+    if module_name in sys.modules:
+        importlib.reload(sys.modules[module_name])
+    return sys.modules[module_name]
+history_module = reload_module('modules.history')
+layout_module = reload_module('modules.layout')
+utils_module = reload_module('modules.utils')
+sidebar_module = reload_module('modules.sidebar')
+ChatHistory = history_module.ChatHistory
+Layout = layout_module.Layout
+Utilities = utils_module.Utilities
+Sidebar = sidebar_module.Sidebar
+def init():
+    load_dotenv()
+    st.set_page_config(layout="wide", page_icon="💬", page_title="ChatBot-CSV")
+def main():
+    init()
+    layout, sidebar, utils = Layout(), Sidebar(), Utilities()
+    layout.show_header()
+    user_api_key = utils.load_api_key()
+    if not user_api_key:
+        layout.show_api_key_missing()
+    else:
+        os.environ["OPENAI_API_KEY"] = user_api_key
+        uploaded_file = utils.handle_upload()
+        if uploaded_file:
+            history = ChatHistory()
+            sidebar.show_options()
+            uploaded_file_content = BytesIO(uploaded_file.getvalue())
+            try:
+                chatbot = utils.setup_chatbot(
+                    uploaded_file, st.session_state["model"], st.session_state["temperature"]
+                )
+                st.session_state["chatbot"] = chatbot
+                if st.session_state["ready"]:
+                    response_container, prompt_container = st.container(), st.container()
+                    with prompt_container:
+                        is_ready, user_input = layout.prompt_form()
+                        history.initialize(uploaded_file)
+                        if st.session_state["reset_chat"]:
+                            history.reset(uploaded_file)
+                        if is_ready:
+                            history.append("user", user_input)
+                            output = st.session_state["chatbot"].conversational_chat(user_input)
+                            history.append("assistant", output)
+                    history.generate_messages(response_container)
+                    if st.session_state["show_csv_agent"]:
+                        query = st.text_input(label="Use CSV agent for precise information about the structure of your csv file / csv 파일 구조에 대한 정확한 정보를 얻으려면 CSV 에이전트를 사용하십시오", placeholder="ex : how many rows in my file ? / 예: 내 파일에 몇 개의 행이 있습니까?")
+                        if query != "":
+                            old_stdout = sys.stdout
+                            sys.stdout = captured_output = StringIO()
+                            agent = create_csv_agent(ChatOpenAI(temperature=0), uploaded_file_content, verbose=True, max_iterations=8)
+                            result = agent.run(query)
+                            sys.stdout = old_stdout
+                            thoughts = captured_output.getvalue()
+                            cleaned_thoughts = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', thoughts)
+                            cleaned_thoughts = re.sub(r'\[1m>', '', cleaned_thoughts)
+                            with st.expander("Show agent's thoughts"):
+                                st.write(cleaned_thoughts)
+                            st.write(result)
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+    sidebar.about()
+if __name__ == "__main__":
+    main()

config/config.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[server]
+headless = true
+enableCORS=false
+enableXsrfProtection=false
+port = 7860
+[theme]
+base = "light"
+primaryColor = "#89CFF0"
+backgroundColor = "#E0F7FE"
+secondaryBackgroundColor = "#FFFCE4"
+textColor = "#000000"
+font = "sans serif"

modules/chatbot.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts.prompt import PromptTemplate
+class Chatbot:
+    _template = """다음 대화와 후속 질문이 주어지면 후속 질문을 독립형 질문으로 바꾸십시오.
+    질문이 CSV 파일의 정보에 관한 것이라고 가정할 수 있습니다.
+    Chat History:
+    {chat_history}
+    Follow-up entry: {question}
+    Standalone question:"""
+    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+    qa_template = """"csv 파일의 정보를 기반으로 질문에 답하는 AI 대화 비서입니다.
+    csv 파일의 데이터와 질문이 제공되며 사용자가 필요한 정보를 찾도록 도와야 합니다.
+    알고 있는 정보에 대해서만 응답하십시오. 답을 지어내려고 하지 마세요.
+    귀하의 답변은 짧고 친근하며 동일한 언어로 작성되어야 합니다.
+    question: {question}
+    =========
+    {context}
+    =======
+    """
+    QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"])
+    def __init__(self, model_name, temperature, vectors):
+        self.model_name = model_name
+        self.temperature = temperature
+        self.vectors = vectors
+    def conversational_chat(self, query):
+        """
+        Starts a conversational chat with a model via Langchain
+        """
+        chain = ConversationalRetrievalChain.from_llm(
+            llm=ChatOpenAI(model_name=self.model_name, temperature=self.temperature),
+            condense_question_prompt=self.CONDENSE_QUESTION_PROMPT,
+            qa_prompt=self.QA_PROMPT,
+            retriever=self.vectors.as_retriever(),
+        )
+        result = chain({"question": query, "chat_history": st.session_state["history"]})
+        st.session_state["history"].append((query, result["answer"]))
+        return result["answer"]

modules/embedder.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import pickle
+import tempfile
+from langchain.document_loaders.csv_loader import CSVLoader
+from langchain.vectorstores import FAISS
+from langchain.embeddings.openai import OpenAIEmbeddings
+class Embedder:
+    def __init__(self):
+        self.PATH = "embeddings"
+        self.createEmbeddingsDir()
+    def createEmbeddingsDir(self):
+        """
+        Creates a directory to store the embeddings vectors
+        """
+        if not os.path.exists(self.PATH):
+            os.mkdir(self.PATH)
+    def storeDocEmbeds(self, file, filename):
+        """
+        Stores document embeddings using Langchain and FAISS
+        """
+        # Write the uploaded file to a temporary file
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
+            tmp_file.write(file)
+            tmp_file_path = tmp_file.name
+        # Load the data from the file using Langchain
+        loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
+        data = loader.load_and_split()
+        # Create an embeddings object using Langchain
+        embeddings = OpenAIEmbeddings()
+        # Store the embeddings vectors using FAISS
+        vectors = FAISS.from_documents(data, embeddings)
+        os.remove(tmp_file_path)
+        # Save the vectors to a pickle file
+        with open(f"{self.PATH}/{filename}.pkl", "wb") as f:
+            pickle.dump(vectors, f)
+    def getDocEmbeds(self, file, filename):
+        """
+        Retrieves document embeddings
+        """
+        # Check if embeddings vectors have already been stored in a pickle file
+        if not os.path.isfile(f"{self.PATH}/{filename}.pkl"):
+            # If not, store the vectors using the storeDocEmbeds function
+            self.storeDocEmbeds(file, filename)
+        # Load the vectors from the pickle file
+        with open(f"{self.PATH}/{filename}.pkl", "rb") as f:
+            vectors = pickle.load(f)
+        return vectors

modules/history.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import streamlit as st
+from streamlit_chat import message
+class ChatHistory:
+    def __init__(self):
+        self.history = st.session_state.get("history", [])
+        st.session_state["history"] = self.history
+    def default_greeting(self):
+        return "안녕 ! 👋"
+    def default_prompt(self, topic):
+        return f"안녕하세요 ! {topic}에 대해 무엇이든 물어보세요 🤗"
+    def initialize_user_history(self):
+        st.session_state["user"] = [self.default_greeting()]
+    def initialize_assistant_history(self, uploaded_file):
+        st.session_state["assistant"] = [self.default_prompt(uploaded_file.name)]
+    def initialize(self, uploaded_file):
+        if "assistant" not in st.session_state:
+            self.initialize_assistant_history(uploaded_file)
+        if "user" not in st.session_state:
+            self.initialize_user_history()
+    def reset(self, uploaded_file):
+        st.session_state["history"] = []
+        self.initialize_user_history()
+        self.initialize_assistant_history(uploaded_file)
+        st.session_state["reset_chat"] = False
+    def append(self, mode, message):
+        st.session_state[mode].append(message)
+    def generate_messages(self, container):
+        if st.session_state["assistant"]:
+            with container:
+                for i in range(len(st.session_state["assistant"])):
+                    message(
+                        st.session_state["user"][i],
+                        is_user=True,
+                        key=f"{i}_user",
+                        avatar_style="big-smile",
+                    )
+                    message(st.session_state["assistant"][i], key=str(i), avatar_style="thumbs")
+    def load(self):
+        if os.path.exists(self.history_file):
+            with open(self.history_file, "r") as f:
+                self.history = f.read().splitlines()
+    def save(self):
+        with open(self.history_file, "w") as f:
+            f.write("\n".join(self.history))

modules/layout.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import streamlit as st
+class Layout:
+    def show_header(self):
+        """
+        Displays the header of the app
+        """
+        st.markdown(
+            """
+            <h1 style='text-align: center;'>CSV-ChatBot, Talk with your  csv-data ! / CSV-ChatBot, csv 데이터로 대화하세요! 💬</h1>
+            """,
+            unsafe_allow_html=True,
+        )
+    def show_api_key_missing(self):
+        """
+        Displays a message if the user has not entered an API key
+        """
+        st.markdown(
+            """
+            <div style='text-align: center;'>
+                <h4>Enter your <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API key</a> to start chatting / 채팅을 시작하려면 <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API 키</a>를 입력하세요.  😉</h4>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    def prompt_form(self):
+        """
+        Displays the prompt form
+        """
+        with st.form(key="my_form", clear_on_submit=True):
+            user_input = st.text_area(
+                "Query: / 질문:",
+                placeholder="Ask me anything about the document... / 문서에 대해 무엇이든 물어보세요...",
+                key="input",
+                label_visibility="collapsed",
+            )
+            submit_button = st.form_submit_button(label="Send / 보내주세요")
+            is_ready = submit_button and user_input
+        return is_ready, user_input

modules/sidebar.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import streamlit as st
+class Sidebar:
+    MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4"]
+    TEMPERATURE_MIN_VALUE = 0.0
+    TEMPERATURE_MAX_VALUE = 1.0
+    TEMPERATURE_DEFAULT_VALUE = 0.0
+    TEMPERATURE_STEP = 0.01
+    @staticmethod
+    def about():
+        about = st.sidebar.expander("🤖에 대한")
+        sections = [
+            "#### CSV-ChatBot는 사용자가 보다 직관적인 방식으로 CSV 데이터를 논의할 수 있도록 설계된 대화형 메모리 기능을 갖춘 AI 챗봇입니다. 📄",
+            "#### 그는 대규모 언어 모델을 사용하여 CSV 데이터를 더 잘 이해할 수 있도록 원활하고 상황에 맞는 자연어 상호 작용을 사용자에게 제공합니다. 🌐",
+            "#### [Langchain](https://github.com/hwchase17/langchain), [OpenAI](https://platform.openai.com/docs/models/gpt-3-5) 및 [Streamlit](https://github.com/streamlit/streamlit)에 의해 구동됩니다 ⚡",
+            "#### Source code : [RustX/ChatBot-CSV](https://github.com/RustX2802/CSV-ChatBot)",
+        ]
+        for section in sections:
+            about.write(section)
+    @staticmethod
+    def reset_chat_button():
+        if st.button("Reset chat / 채팅 재설정"):
+            st.session_state["reset_chat"] = True
+        st.session_state.setdefault("reset_chat", False)
+    def model_selector(self):
+        model = st.selectbox(label="Model / 모델", options=self.MODEL_OPTIONS)
+        st.session_state["model"] = model
+    def temperature_slider(self):
+        temperature = st.slider(
+            label="Temperature / 온도",
+            min_value=self.TEMPERATURE_MIN_VALUE,
+            max_value=self.TEMPERATURE_MAX_VALUE,
+            value=self.TEMPERATURE_DEFAULT_VALUE,
+            step=self.TEMPERATURE_STEP,
+        )
+        st.session_state["temperature"] = temperature
+    def csv_agent_button(self):
+        st.session_state.setdefault("show_csv_agent", False)
+        if st.sidebar.button("CSV Agent"):
+            st.session_state["show_csv_agent"] = not st.session_state["show_csv_agent"]
+    def show_options(self):
+        with st.sidebar.expander("🛠️ Tools / 도구", expanded=False):
+            self.reset_chat_button()
+            self.csv_agent_button()
+            self.model_selector()
+            self.temperature_slider()
+            st.session_state.setdefault("model", self.MODEL_OPTIONS[0])
+            st.session_state.setdefault("temperature", self.TEMPERATURE_DEFAULT_VALUE)

modules/utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import pandas as pd
+import streamlit as st
+from modules.chatbot import Chatbot
+from modules.embedder import Embedder
+class Utilities:
+    @staticmethod
+    def load_api_key():
+        """
+        Loads the OpenAI API key from the .env file or from the user's input
+        and returns it
+        """
+        if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
+            user_api_key = os.environ["OPENAI_API_KEY"]
+            st.sidebar.success("API key loaded from .env / .env에서 로드된 API 키", icon="🚀")
+        else:
+            user_api_key = st.sidebar.text_input(
+                label="#### Your OpenAI API key / OpenAI API 키 👇", placeholder="Paste your openAI API key, sk-", type="password"
+            )
+            if user_api_key:
+                st.sidebar.success("API key loaded / API 키가 로드되었습니다", icon="🚀")
+        return user_api_key
+    @staticmethod
+    def handle_upload():
+        """
+        Handles the file upload and displays the uploaded file
+        """
+        uploaded_file = st.sidebar.file_uploader("upload", type="csv", label_visibility="collapsed")
+        if uploaded_file is not None:
+            def show_user_file(uploaded_file):
+                file_container = st.expander("Your CSV file : / CSV 파일:")
+                shows = pd.read_csv(uploaded_file)
+                uploaded_file.seek(0)
+                file_container.write(shows)
+            show_user_file(uploaded_file)
+        else:
+            st.sidebar.info(
+                "👆 Upload your CSV file to get started, / 시작하려면 CSV 파일을 업로드하세요 "
+                "sample for try : / 시도해 볼 샘플: [example.csv](https://drive.google.com/file/d/1g7x0Ydg5kr51Ha2XIYBSQBVUw1yYlgmc/view?usp=share_link)"
+            )
+            st.session_state["reset_chat"] = True
+        return uploaded_file
+    @staticmethod
+    def setup_chatbot(uploaded_file, model, temperature):
+        """
+        Sets up the chatbot with the uploaded file, model, and temperature
+        """
+        embeds = Embedder()
+        with st.spinner("Processing... / 처리 중..."):
+            uploaded_file.seek(0)
+            file = uploaded_file.read()
+            vectors = embeds.getDocEmbeds(file, uploaded_file.name)
+            chatbot = Chatbot(model, temperature, vectors)
+        st.session_state["ready"] = True
+        return chatbot

requirements.txt ADDED Viewed

	@@ -0,0 +1,75 @@

+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==4.2.2
+async-timeout==4.0.2
+attrs==23.1.0
+blinker==1.6.2
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+colorama==0.4.6
+dataclasses-json==0.5.7
+decorator==5.1.1
+entrypoints==0.4
+faiss-cpu==1.7.3
+frozenlist==1.3.3
+gitdb==4.0.10
+GitPython==3.1.31
+greenlet==2.0.2
+hupper==1.12
+idna==3.4
+importlib-metadata==6.5.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+langchain==0.0.146
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+mdurl==0.1.2
+multidict==6.0.4
+mypy-extensions==1.0.0
+nest-asyncio==1.5.6
+numexpr==2.8.4
+numpy==1.24.2
+openai==0.27.4
+openapi-schema-pydantic==1.2.4
+packaging==23.1
+pandas==1.5.3
+Pillow==9.5.0
+protobuf==3.20.3
+python-dotenv==1.0.0
+pyarrow==11.0.0
+pydantic==1.10.7
+pydeck==0.8.1b0
+Pygments==2.15.1
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.3
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+regex==2023.3.23
+requests==2.28.2
+rich==13.3.4
+six==1.16.0
+smmap==5.0.0
+SQLAlchemy==1.4.47
+streamlit==1.21.0
+streamlit-chat==0.0.2.2
+tenacity==8.2.2
+tiktoken==0.3.3
+toml==0.10.2
+toolz==0.12.0
+tornado==6.3
+tqdm==4.65.0
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+tzdata==2023.3
+tzlocal==4.3
+urllib3==1.26.15
+validators==0.20.0
+watchdog==3.0.0
+yarl==1.8.2
+zipp==3.15.0