File size: 2,616 Bytes
0658357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import gradio as gr

from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader


def load_embeddings():
    print(os.environ)
    model_name = os.environ['HUGGINGFACEHUB_EMBEDDINGS_MODEL_NAME']
    return HuggingFaceInstructEmbeddings(model_name=model_name)


def split_file(file):
    print(file.name)
    loader = TextLoader(file.name)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    return text_splitter.split_documents(documents)


def get_persist_directory(file_name):
    return os.path.join(os.environ['CHROMADB_PERSIST_DIRECTORY'], file_name)


def process_file(file):
    embeddings = load_embeddings()
    print(embeddings)
    docs = split_file(file)
    print(docs)

    file_name, _ = os.path.splitext(os.path.basename(file.name))
    persist_directory = get_persist_directory(file_name)
    print(persist_directory)
    vectordb = Chroma.from_documents(documents=docs, embedding=embeddings,
                                     collection_name=file_name, persist_directory=persist_directory)
    print(vectordb._client.list_collections())
    vectordb.persist()
    return None


def load_vectordb(file_name):
    embeddings = load_embeddings()

    persist_directory = get_persist_directory(file_name)
    vectordb = Chroma(collection_name=file_name,
                      embedding_function=embeddings, persist_directory=persist_directory)
    return vectordb


def add_text(bot_history, text):
    bot_history = bot_history + [(text, None)]
    return bot_history, ""


def bot(bot_history):
    bot_history[-1][1] = 'so cool!'
    return bot_history


def clear_bot():
    return None


title = "QnA Chatbot"

with gr.Blocks() as demo:
    gr.Markdown(f"# {title}")

    with gr.Row():
        with gr.Column(scale=0.5):
            upload = gr.File(file_types=["text"], label="Upload file")

            process = gr.Button("Process")

        with gr.Column(scale=0.5):
            chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)

            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
            ).style(container=False)

            clear = gr.Button("Clear")

    process.click(process_file, upload, None)

    txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
        bot, chatbot, chatbot
    )
    clear.click(clear_bot, None, chatbot)

demo.title = title

demo.launch()