Spaces:
Sleeping
Sleeping
Duplicate from RustX/CSV-ChatBot
Browse filesCo-authored-by: Rustam Ismailov <[email protected]>
- .gitattributes +34 -0
- .streamlit/config.toml +7 -0
- Dockerfile +20 -0
- README.md +11 -0
- chatbot_csv.py +109 -0
- config/config.toml +13 -0
- modules/chatbot.py +49 -0
- modules/embedder.py +58 -0
- modules/history.py +57 -0
- modules/layout.py +42 -0
- modules/sidebar.py +55 -0
- modules/utils.py +62 -0
- requirements.txt +75 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base = "light"
|
3 |
+
primaryColor = "#89CFF0"
|
4 |
+
backgroundColor = "#E0F7FE"
|
5 |
+
secondaryBackgroundColor = "#FFFCE4"
|
6 |
+
textColor = "#000000"
|
7 |
+
font = "sans serif"
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9.5
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY requirements.txt ./
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
RUN useradd -m -u 1000 user
|
10 |
+
USER user
|
11 |
+
ENV HOME=/home/user \
|
12 |
+
PATH=/home/user/.local/bin:$PATH
|
13 |
+
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
COPY --chown=user . $HOME/app
|
17 |
+
|
18 |
+
COPY --chown=user config/config.toml $HOME/app/.streamlit/config.toml
|
19 |
+
|
20 |
+
CMD ["streamlit" , "run", "chatbot_csv.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: CSV ChatBot
|
3 |
+
emoji: ๐ป
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
duplicated_from: RustX/CSV-ChatBot
|
9 |
+
---
|
10 |
+
|
11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
chatbot_csv.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from io import BytesIO
|
5 |
+
from io import StringIO
|
6 |
+
import sys
|
7 |
+
import re
|
8 |
+
from langchain.agents import create_csv_agent
|
9 |
+
from langchain.chat_models import ChatOpenAI
|
10 |
+
from modules.history import ChatHistory
|
11 |
+
from modules.layout import Layout
|
12 |
+
from modules.utils import Utilities
|
13 |
+
from modules.sidebar import Sidebar
|
14 |
+
|
15 |
+
#To be able to update the changes made to modules in localhost,
|
16 |
+
#you can press the "r" key on the localhost page to refresh and reflect the changes made to the module files.
|
17 |
+
def reload_module(module_name):
|
18 |
+
import importlib
|
19 |
+
import sys
|
20 |
+
if module_name in sys.modules:
|
21 |
+
importlib.reload(sys.modules[module_name])
|
22 |
+
return sys.modules[module_name]
|
23 |
+
|
24 |
+
history_module = reload_module('modules.history')
|
25 |
+
layout_module = reload_module('modules.layout')
|
26 |
+
utils_module = reload_module('modules.utils')
|
27 |
+
sidebar_module = reload_module('modules.sidebar')
|
28 |
+
|
29 |
+
ChatHistory = history_module.ChatHistory
|
30 |
+
Layout = layout_module.Layout
|
31 |
+
Utilities = utils_module.Utilities
|
32 |
+
Sidebar = sidebar_module.Sidebar
|
33 |
+
|
34 |
+
def init():
|
35 |
+
load_dotenv()
|
36 |
+
st.set_page_config(layout="wide", page_icon="๐ฌ", page_title="ChatBot-CSV")
|
37 |
+
|
38 |
+
|
39 |
+
def main():
|
40 |
+
|
41 |
+
init()
|
42 |
+
layout, sidebar, utils = Layout(), Sidebar(), Utilities()
|
43 |
+
layout.show_header()
|
44 |
+
user_api_key = utils.load_api_key()
|
45 |
+
|
46 |
+
if not user_api_key:
|
47 |
+
layout.show_api_key_missing()
|
48 |
+
else:
|
49 |
+
os.environ["OPENAI_API_KEY"] = user_api_key
|
50 |
+
uploaded_file = utils.handle_upload()
|
51 |
+
|
52 |
+
if uploaded_file:
|
53 |
+
history = ChatHistory()
|
54 |
+
sidebar.show_options()
|
55 |
+
|
56 |
+
uploaded_file_content = BytesIO(uploaded_file.getvalue())
|
57 |
+
|
58 |
+
try:
|
59 |
+
chatbot = utils.setup_chatbot(
|
60 |
+
uploaded_file, st.session_state["model"], st.session_state["temperature"]
|
61 |
+
)
|
62 |
+
st.session_state["chatbot"] = chatbot
|
63 |
+
|
64 |
+
if st.session_state["ready"]:
|
65 |
+
response_container, prompt_container = st.container(), st.container()
|
66 |
+
|
67 |
+
with prompt_container:
|
68 |
+
is_ready, user_input = layout.prompt_form()
|
69 |
+
|
70 |
+
history.initialize(uploaded_file)
|
71 |
+
if st.session_state["reset_chat"]:
|
72 |
+
history.reset(uploaded_file)
|
73 |
+
|
74 |
+
if is_ready:
|
75 |
+
history.append("user", user_input)
|
76 |
+
output = st.session_state["chatbot"].conversational_chat(user_input)
|
77 |
+
history.append("assistant", output)
|
78 |
+
|
79 |
+
history.generate_messages(response_container)
|
80 |
+
|
81 |
+
if st.session_state["show_csv_agent"]:
|
82 |
+
query = st.text_input(label="Use CSV agent for precise information about the structure of your csv file / csv ํ์ผ ๊ตฌ์กฐ์ ๋ํ ์ ํํ ์ ๋ณด๋ฅผ ์ป์ผ๋ ค๋ฉด CSV ์์ด์ ํธ๋ฅผ ์ฌ์ฉํ์ญ์์ค", placeholder="ex : how many rows in my file ? / ์: ๋ด ํ์ผ์ ๋ช ๊ฐ์ ํ์ด ์์ต๋๊น?")
|
83 |
+
if query != "":
|
84 |
+
|
85 |
+
old_stdout = sys.stdout
|
86 |
+
sys.stdout = captured_output = StringIO()
|
87 |
+
agent = create_csv_agent(ChatOpenAI(temperature=0), uploaded_file_content, verbose=True, max_iterations=8)
|
88 |
+
|
89 |
+
result = agent.run(query)
|
90 |
+
|
91 |
+
sys.stdout = old_stdout
|
92 |
+
thoughts = captured_output.getvalue()
|
93 |
+
|
94 |
+
cleaned_thoughts = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', thoughts)
|
95 |
+
cleaned_thoughts = re.sub(r'\[1m>', '', cleaned_thoughts)
|
96 |
+
|
97 |
+
with st.expander("Show agent's thoughts"):
|
98 |
+
st.write(cleaned_thoughts)
|
99 |
+
|
100 |
+
st.write(result)
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
st.error(f"Error: {str(e)}")
|
104 |
+
|
105 |
+
sidebar.about()
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == "__main__":
|
109 |
+
main()
|
config/config.toml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
headless = true
|
3 |
+
enableCORS=false
|
4 |
+
enableXsrfProtection=false
|
5 |
+
port = 7860
|
6 |
+
|
7 |
+
[theme]
|
8 |
+
base = "light"
|
9 |
+
primaryColor = "#89CFF0"
|
10 |
+
backgroundColor = "#E0F7FE"
|
11 |
+
secondaryBackgroundColor = "#FFFCE4"
|
12 |
+
textColor = "#000000"
|
13 |
+
font = "sans serif"
|
modules/chatbot.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain.chat_models import ChatOpenAI
|
3 |
+
from langchain.chains import ConversationalRetrievalChain
|
4 |
+
from langchain.prompts.prompt import PromptTemplate
|
5 |
+
|
6 |
+
|
7 |
+
class Chatbot:
|
8 |
+
_template = """๋ค์ ๋ํ์ ํ์ ์ง๋ฌธ์ด ์ฃผ์ด์ง๋ฉด ํ์ ์ง๋ฌธ์ ๋
๋ฆฝํ ์ง๋ฌธ์ผ๋ก ๋ฐ๊พธ์ญ์์ค.
|
9 |
+
์ง๋ฌธ์ด CSV ํ์ผ์ ์ ๋ณด์ ๊ดํ ๊ฒ์ด๋ผ๊ณ ๊ฐ์ ํ ์ ์์ต๋๋ค.
|
10 |
+
Chat History:
|
11 |
+
{chat_history}
|
12 |
+
Follow-up entry: {question}
|
13 |
+
Standalone question:"""
|
14 |
+
|
15 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
16 |
+
|
17 |
+
qa_template = """"csv ํ์ผ์ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ๋ตํ๋ AI ๋ํ ๋น์์
๋๋ค.
|
18 |
+
csv ํ์ผ์ ๋ฐ์ดํฐ์ ์ง๋ฌธ์ด ์ ๊ณต๋๋ฉฐ ์ฌ์ฉ์๊ฐ ํ์ํ ์ ๋ณด๋ฅผ ์ฐพ๋๋ก ๋์์ผ ํฉ๋๋ค.
|
19 |
+
์๊ณ ์๋ ์ ๋ณด์ ๋ํด์๋ง ์๋ตํ์ญ์์ค. ๋ต์ ์ง์ด๋ด๋ ค๊ณ ํ์ง ๋ง์ธ์.
|
20 |
+
๊ทํ์ ๋ต๋ณ์ ์งง๊ณ ์น๊ทผํ๋ฉฐ ๋์ผํ ์ธ์ด๋ก ์์ฑ๋์ด์ผ ํฉ๋๋ค.
|
21 |
+
question: {question}
|
22 |
+
=========
|
23 |
+
{context}
|
24 |
+
=======
|
25 |
+
"""
|
26 |
+
|
27 |
+
QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"])
|
28 |
+
|
29 |
+
def __init__(self, model_name, temperature, vectors):
|
30 |
+
self.model_name = model_name
|
31 |
+
self.temperature = temperature
|
32 |
+
self.vectors = vectors
|
33 |
+
|
34 |
+
def conversational_chat(self, query):
|
35 |
+
"""
|
36 |
+
Starts a conversational chat with a model via Langchain
|
37 |
+
"""
|
38 |
+
|
39 |
+
chain = ConversationalRetrievalChain.from_llm(
|
40 |
+
llm=ChatOpenAI(model_name=self.model_name, temperature=self.temperature),
|
41 |
+
condense_question_prompt=self.CONDENSE_QUESTION_PROMPT,
|
42 |
+
qa_prompt=self.QA_PROMPT,
|
43 |
+
retriever=self.vectors.as_retriever(),
|
44 |
+
)
|
45 |
+
result = chain({"question": query, "chat_history": st.session_state["history"]})
|
46 |
+
|
47 |
+
st.session_state["history"].append((query, result["answer"]))
|
48 |
+
|
49 |
+
return result["answer"]
|
modules/embedder.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import tempfile
|
4 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
5 |
+
from langchain.vectorstores import FAISS
|
6 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
7 |
+
|
8 |
+
|
9 |
+
class Embedder:
|
10 |
+
def __init__(self):
|
11 |
+
self.PATH = "embeddings"
|
12 |
+
self.createEmbeddingsDir()
|
13 |
+
|
14 |
+
def createEmbeddingsDir(self):
|
15 |
+
"""
|
16 |
+
Creates a directory to store the embeddings vectors
|
17 |
+
"""
|
18 |
+
if not os.path.exists(self.PATH):
|
19 |
+
os.mkdir(self.PATH)
|
20 |
+
|
21 |
+
def storeDocEmbeds(self, file, filename):
|
22 |
+
"""
|
23 |
+
Stores document embeddings using Langchain and FAISS
|
24 |
+
"""
|
25 |
+
# Write the uploaded file to a temporary file
|
26 |
+
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
|
27 |
+
tmp_file.write(file)
|
28 |
+
tmp_file_path = tmp_file.name
|
29 |
+
|
30 |
+
# Load the data from the file using Langchain
|
31 |
+
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
|
32 |
+
data = loader.load_and_split()
|
33 |
+
|
34 |
+
# Create an embeddings object using Langchain
|
35 |
+
embeddings = OpenAIEmbeddings()
|
36 |
+
|
37 |
+
# Store the embeddings vectors using FAISS
|
38 |
+
vectors = FAISS.from_documents(data, embeddings)
|
39 |
+
os.remove(tmp_file_path)
|
40 |
+
|
41 |
+
# Save the vectors to a pickle file
|
42 |
+
with open(f"{self.PATH}/{filename}.pkl", "wb") as f:
|
43 |
+
pickle.dump(vectors, f)
|
44 |
+
|
45 |
+
def getDocEmbeds(self, file, filename):
|
46 |
+
"""
|
47 |
+
Retrieves document embeddings
|
48 |
+
"""
|
49 |
+
# Check if embeddings vectors have already been stored in a pickle file
|
50 |
+
if not os.path.isfile(f"{self.PATH}/{filename}.pkl"):
|
51 |
+
# If not, store the vectors using the storeDocEmbeds function
|
52 |
+
self.storeDocEmbeds(file, filename)
|
53 |
+
|
54 |
+
# Load the vectors from the pickle file
|
55 |
+
with open(f"{self.PATH}/{filename}.pkl", "rb") as f:
|
56 |
+
vectors = pickle.load(f)
|
57 |
+
|
58 |
+
return vectors
|
modules/history.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from streamlit_chat import message
|
4 |
+
|
5 |
+
|
6 |
+
class ChatHistory:
|
7 |
+
def __init__(self):
|
8 |
+
self.history = st.session_state.get("history", [])
|
9 |
+
st.session_state["history"] = self.history
|
10 |
+
|
11 |
+
def default_greeting(self):
|
12 |
+
return "์๋
! ๐"
|
13 |
+
|
14 |
+
def default_prompt(self, topic):
|
15 |
+
return f"์๋
ํ์ธ์ ! {topic}์ ๋ํด ๋ฌด์์ด๋ ๋ฌผ์ด๋ณด์ธ์ ๐ค"
|
16 |
+
|
17 |
+
def initialize_user_history(self):
|
18 |
+
st.session_state["user"] = [self.default_greeting()]
|
19 |
+
|
20 |
+
def initialize_assistant_history(self, uploaded_file):
|
21 |
+
st.session_state["assistant"] = [self.default_prompt(uploaded_file.name)]
|
22 |
+
|
23 |
+
def initialize(self, uploaded_file):
|
24 |
+
if "assistant" not in st.session_state:
|
25 |
+
self.initialize_assistant_history(uploaded_file)
|
26 |
+
if "user" not in st.session_state:
|
27 |
+
self.initialize_user_history()
|
28 |
+
|
29 |
+
def reset(self, uploaded_file):
|
30 |
+
st.session_state["history"] = []
|
31 |
+
self.initialize_user_history()
|
32 |
+
self.initialize_assistant_history(uploaded_file)
|
33 |
+
st.session_state["reset_chat"] = False
|
34 |
+
|
35 |
+
def append(self, mode, message):
|
36 |
+
st.session_state[mode].append(message)
|
37 |
+
|
38 |
+
def generate_messages(self, container):
|
39 |
+
if st.session_state["assistant"]:
|
40 |
+
with container:
|
41 |
+
for i in range(len(st.session_state["assistant"])):
|
42 |
+
message(
|
43 |
+
st.session_state["user"][i],
|
44 |
+
is_user=True,
|
45 |
+
key=f"{i}_user",
|
46 |
+
avatar_style="big-smile",
|
47 |
+
)
|
48 |
+
message(st.session_state["assistant"][i], key=str(i), avatar_style="thumbs")
|
49 |
+
|
50 |
+
def load(self):
|
51 |
+
if os.path.exists(self.history_file):
|
52 |
+
with open(self.history_file, "r") as f:
|
53 |
+
self.history = f.read().splitlines()
|
54 |
+
|
55 |
+
def save(self):
|
56 |
+
with open(self.history_file, "w") as f:
|
57 |
+
f.write("\n".join(self.history))
|
modules/layout.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
|
4 |
+
class Layout:
|
5 |
+
def show_header(self):
|
6 |
+
"""
|
7 |
+
Displays the header of the app
|
8 |
+
"""
|
9 |
+
st.markdown(
|
10 |
+
"""
|
11 |
+
<h1 style='text-align: center;'>CSV-ChatBot, Talk with your csv-data ! / CSV-ChatBot, csv ๋ฐ์ดํฐ๋ก ๋ํํ์ธ์! ๐ฌ</h1>
|
12 |
+
""",
|
13 |
+
unsafe_allow_html=True,
|
14 |
+
)
|
15 |
+
|
16 |
+
def show_api_key_missing(self):
|
17 |
+
"""
|
18 |
+
Displays a message if the user has not entered an API key
|
19 |
+
"""
|
20 |
+
st.markdown(
|
21 |
+
"""
|
22 |
+
<div style='text-align: center;'>
|
23 |
+
<h4>Enter your <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API key</a> to start chatting / ์ฑํ
์ ์์ํ๋ ค๋ฉด <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API ํค</a>๋ฅผ ์
๋ ฅํ์ธ์. ๐</h4>
|
24 |
+
</div>
|
25 |
+
""",
|
26 |
+
unsafe_allow_html=True,
|
27 |
+
)
|
28 |
+
|
29 |
+
def prompt_form(self):
|
30 |
+
"""
|
31 |
+
Displays the prompt form
|
32 |
+
"""
|
33 |
+
with st.form(key="my_form", clear_on_submit=True):
|
34 |
+
user_input = st.text_area(
|
35 |
+
"Query: / ์ง๋ฌธ:",
|
36 |
+
placeholder="Ask me anything about the document... / ๋ฌธ์์ ๋ํด ๋ฌด์์ด๋ ๋ฌผ์ด๋ณด์ธ์...",
|
37 |
+
key="input",
|
38 |
+
label_visibility="collapsed",
|
39 |
+
)
|
40 |
+
submit_button = st.form_submit_button(label="Send / ๋ณด๋ด์ฃผ์ธ์")
|
41 |
+
is_ready = submit_button and user_input
|
42 |
+
return is_ready, user_input
|
modules/sidebar.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
|
4 |
+
class Sidebar:
|
5 |
+
MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4"]
|
6 |
+
TEMPERATURE_MIN_VALUE = 0.0
|
7 |
+
TEMPERATURE_MAX_VALUE = 1.0
|
8 |
+
TEMPERATURE_DEFAULT_VALUE = 0.0
|
9 |
+
TEMPERATURE_STEP = 0.01
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def about():
|
13 |
+
about = st.sidebar.expander("๐ค์ ๋ํ")
|
14 |
+
sections = [
|
15 |
+
"#### CSV-ChatBot๋ ์ฌ์ฉ์๊ฐ ๋ณด๋ค ์ง๊ด์ ์ธ ๋ฐฉ์์ผ๋ก CSV ๋ฐ์ดํฐ๋ฅผ ๋
ผ์ํ ์ ์๋๋ก ์ค๊ณ๋ ๋ํํ ๋ฉ๋ชจ๋ฆฌ ๊ธฐ๋ฅ์ ๊ฐ์ถ AI ์ฑ๋ด์
๋๋ค. ๐",
|
16 |
+
"#### ๊ทธ๋ ๋๊ท๋ชจ ์ธ์ด ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ CSV ๋ฐ์ดํฐ๋ฅผ ๋ ์ ์ดํดํ ์ ์๋๋ก ์ํํ๊ณ ์ํฉ์ ๋ง๋ ์์ฐ์ด ์ํธ ์์ฉ์ ์ฌ์ฉ์์๊ฒ ์ ๊ณตํฉ๋๋ค. ๐",
|
17 |
+
"#### [Langchain](https://github.com/hwchase17/langchain), [OpenAI](https://platform.openai.com/docs/models/gpt-3-5) ๋ฐ [Streamlit](https://github.com/streamlit/streamlit)์ ์ํด ๊ตฌ๋๋ฉ๋๋ค โก",
|
18 |
+
"#### Source code : [RustX/ChatBot-CSV](https://github.com/RustX2802/CSV-ChatBot)",
|
19 |
+
]
|
20 |
+
for section in sections:
|
21 |
+
about.write(section)
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def reset_chat_button():
|
25 |
+
if st.button("Reset chat / ์ฑํ
์ฌ์ค์ "):
|
26 |
+
st.session_state["reset_chat"] = True
|
27 |
+
st.session_state.setdefault("reset_chat", False)
|
28 |
+
|
29 |
+
def model_selector(self):
|
30 |
+
model = st.selectbox(label="Model / ๋ชจ๋ธ", options=self.MODEL_OPTIONS)
|
31 |
+
st.session_state["model"] = model
|
32 |
+
|
33 |
+
def temperature_slider(self):
|
34 |
+
temperature = st.slider(
|
35 |
+
label="Temperature / ์จ๋",
|
36 |
+
min_value=self.TEMPERATURE_MIN_VALUE,
|
37 |
+
max_value=self.TEMPERATURE_MAX_VALUE,
|
38 |
+
value=self.TEMPERATURE_DEFAULT_VALUE,
|
39 |
+
step=self.TEMPERATURE_STEP,
|
40 |
+
)
|
41 |
+
st.session_state["temperature"] = temperature
|
42 |
+
|
43 |
+
def csv_agent_button(self):
|
44 |
+
st.session_state.setdefault("show_csv_agent", False)
|
45 |
+
if st.sidebar.button("CSV Agent"):
|
46 |
+
st.session_state["show_csv_agent"] = not st.session_state["show_csv_agent"]
|
47 |
+
|
48 |
+
def show_options(self):
|
49 |
+
with st.sidebar.expander("๐ ๏ธ Tools / ๋๊ตฌ", expanded=False):
|
50 |
+
self.reset_chat_button()
|
51 |
+
self.csv_agent_button()
|
52 |
+
self.model_selector()
|
53 |
+
self.temperature_slider()
|
54 |
+
st.session_state.setdefault("model", self.MODEL_OPTIONS[0])
|
55 |
+
st.session_state.setdefault("temperature", self.TEMPERATURE_DEFAULT_VALUE)
|
modules/utils.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from modules.chatbot import Chatbot
|
6 |
+
from modules.embedder import Embedder
|
7 |
+
|
8 |
+
|
9 |
+
class Utilities:
|
10 |
+
@staticmethod
|
11 |
+
def load_api_key():
|
12 |
+
"""
|
13 |
+
Loads the OpenAI API key from the .env file or from the user's input
|
14 |
+
and returns it
|
15 |
+
"""
|
16 |
+
if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
|
17 |
+
user_api_key = os.environ["OPENAI_API_KEY"]
|
18 |
+
st.sidebar.success("API key loaded from .env / .env์์ ๋ก๋๋ API ํค", icon="๐")
|
19 |
+
else:
|
20 |
+
user_api_key = st.sidebar.text_input(
|
21 |
+
label="#### Your OpenAI API key / OpenAI API ํค ๐", placeholder="Paste your openAI API key, sk-", type="password"
|
22 |
+
)
|
23 |
+
if user_api_key:
|
24 |
+
st.sidebar.success("API key loaded / API ํค๊ฐ ๋ก๋๋์์ต๋๋ค", icon="๐")
|
25 |
+
return user_api_key
|
26 |
+
|
27 |
+
@staticmethod
|
28 |
+
def handle_upload():
|
29 |
+
"""
|
30 |
+
Handles the file upload and displays the uploaded file
|
31 |
+
"""
|
32 |
+
uploaded_file = st.sidebar.file_uploader("upload", type="csv", label_visibility="collapsed")
|
33 |
+
if uploaded_file is not None:
|
34 |
+
|
35 |
+
def show_user_file(uploaded_file):
|
36 |
+
file_container = st.expander("Your CSV file : / CSV ํ์ผ:")
|
37 |
+
shows = pd.read_csv(uploaded_file)
|
38 |
+
uploaded_file.seek(0)
|
39 |
+
file_container.write(shows)
|
40 |
+
|
41 |
+
show_user_file(uploaded_file)
|
42 |
+
else:
|
43 |
+
st.sidebar.info(
|
44 |
+
"๐ Upload your CSV file to get started, / ์์ํ๋ ค๋ฉด CSV ํ์ผ์ ์
๋ก๋ํ์ธ์ "
|
45 |
+
"sample for try : / ์๋ํด ๋ณผ ์ํ: [example.csv](https://drive.google.com/file/d/1g7x0Ydg5kr51Ha2XIYBSQBVUw1yYlgmc/view?usp=share_link)"
|
46 |
+
)
|
47 |
+
st.session_state["reset_chat"] = True
|
48 |
+
return uploaded_file
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
def setup_chatbot(uploaded_file, model, temperature):
|
52 |
+
"""
|
53 |
+
Sets up the chatbot with the uploaded file, model, and temperature
|
54 |
+
"""
|
55 |
+
embeds = Embedder()
|
56 |
+
with st.spinner("Processing... / ์ฒ๋ฆฌ ์ค..."):
|
57 |
+
uploaded_file.seek(0)
|
58 |
+
file = uploaded_file.read()
|
59 |
+
vectors = embeds.getDocEmbeds(file, uploaded_file.name)
|
60 |
+
chatbot = Chatbot(model, temperature, vectors)
|
61 |
+
st.session_state["ready"] = True
|
62 |
+
return chatbot
|
requirements.txt
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.4
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==4.2.2
|
4 |
+
async-timeout==4.0.2
|
5 |
+
attrs==23.1.0
|
6 |
+
blinker==1.6.2
|
7 |
+
cachetools==5.3.0
|
8 |
+
certifi==2022.12.7
|
9 |
+
charset-normalizer==3.1.0
|
10 |
+
click==8.1.3
|
11 |
+
colorama==0.4.6
|
12 |
+
dataclasses-json==0.5.7
|
13 |
+
decorator==5.1.1
|
14 |
+
entrypoints==0.4
|
15 |
+
faiss-cpu==1.7.3
|
16 |
+
frozenlist==1.3.3
|
17 |
+
gitdb==4.0.10
|
18 |
+
GitPython==3.1.31
|
19 |
+
greenlet==2.0.2
|
20 |
+
hupper==1.12
|
21 |
+
idna==3.4
|
22 |
+
importlib-metadata==6.5.0
|
23 |
+
Jinja2==3.1.2
|
24 |
+
jsonschema==4.17.3
|
25 |
+
langchain==0.0.146
|
26 |
+
markdown-it-py==2.2.0
|
27 |
+
MarkupSafe==2.1.2
|
28 |
+
marshmallow==3.19.0
|
29 |
+
marshmallow-enum==1.5.1
|
30 |
+
mdurl==0.1.2
|
31 |
+
multidict==6.0.4
|
32 |
+
mypy-extensions==1.0.0
|
33 |
+
nest-asyncio==1.5.6
|
34 |
+
numexpr==2.8.4
|
35 |
+
numpy==1.24.2
|
36 |
+
openai==0.27.4
|
37 |
+
openapi-schema-pydantic==1.2.4
|
38 |
+
packaging==23.1
|
39 |
+
pandas==1.5.3
|
40 |
+
Pillow==9.5.0
|
41 |
+
protobuf==3.20.3
|
42 |
+
python-dotenv==1.0.0
|
43 |
+
pyarrow==11.0.0
|
44 |
+
pydantic==1.10.7
|
45 |
+
pydeck==0.8.1b0
|
46 |
+
Pygments==2.15.1
|
47 |
+
Pympler==1.0.1
|
48 |
+
pyrsistent==0.19.3
|
49 |
+
python-dateutil==2.8.2
|
50 |
+
pytz==2023.3
|
51 |
+
pytz-deprecation-shim==0.1.0.post0
|
52 |
+
PyYAML==6.0
|
53 |
+
regex==2023.3.23
|
54 |
+
requests==2.28.2
|
55 |
+
rich==13.3.4
|
56 |
+
six==1.16.0
|
57 |
+
smmap==5.0.0
|
58 |
+
SQLAlchemy==1.4.47
|
59 |
+
streamlit==1.21.0
|
60 |
+
streamlit-chat==0.0.2.2
|
61 |
+
tenacity==8.2.2
|
62 |
+
tiktoken==0.3.3
|
63 |
+
toml==0.10.2
|
64 |
+
toolz==0.12.0
|
65 |
+
tornado==6.3
|
66 |
+
tqdm==4.65.0
|
67 |
+
typing-inspect==0.8.0
|
68 |
+
typing_extensions==4.5.0
|
69 |
+
tzdata==2023.3
|
70 |
+
tzlocal==4.3
|
71 |
+
urllib3==1.26.15
|
72 |
+
validators==0.20.0
|
73 |
+
watchdog==3.0.0
|
74 |
+
yarl==1.8.2
|
75 |
+
zipp==3.15.0
|