File size: 4,022 Bytes
9057f38
 
 
 
 
 
 
32e37e9
9057f38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c6bffd
 
 
 
32e37e9
 
 
 
 
 
 
9057f38
 
 
4c6bffd
9057f38
32e37e9
 
 
9057f38
4c6bffd
 
 
32e37e9
9057f38
 
 
 
 
 
 
4c6bffd
9057f38
32e37e9
 
 
 
4c6bffd
32e37e9
 
9057f38
32e37e9
 
 
 
4c6bffd
9057f38
4c6bffd
9057f38
 
 
 
 
 
4c6bffd
32e37e9
 
 
 
 
 
 
 
 
 
 
4c6bffd
 
 
9057f38
32e37e9
 
 
4c6bffd
 
 
 
 
 
 
32e37e9
9057f38
4c6bffd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9057f38
4c6bffd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from ctransformers import AutoModelForCausalLM

# Load the embedding model
encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
print("Embedding model loaded...")

# Load the LLM
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Llama-2-7B-Chat-GGUF",
    model_file="llama-2-7b-chat.Q3_K_S.gguf",
    model_type="llama",
    temperature=0.2,
    repetition_penalty=1.5,
    max_new_tokens=300,
)
print("LLM loaded...")

# Initialize QdrantClient
client = QdrantClient(path="./db")
print("DB created...")

def get_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=250,
        chunk_overlap=50,
        length_function=len,
    )
    return text_splitter.split_text(text)

def setup_database(files):
    all_chunks = []

    for file in files:
        reader = PdfReader(file)
        text = "".join(page.extract_text() for page in reader.pages)
        chunks = get_chunks(text)
        all_chunks.extend(chunks)
    
    print(f"Total chunks: {len(all_chunks)}")
    print("Chunks are ready...")

    client.recreate_collection(
        collection_name="my_facts",
        vectors_config=models.VectorParams(
            size=encoder.get_sentence_embedding_dimension(),
            distance=models.Distance.COSINE,
        ),
    )
    print("Collection created...")

    records = [
        models.Record(
            id=idx,
            vector=encoder.encode(chunk).tolist(),
            payload={"text": chunk}
        ) for idx, chunk in enumerate(all_chunks)
    ]
    
    client.upload_records(
        collection_name="my_facts",
        records=records,
    )
    print("Records uploaded...")

def answer(question):
    hits = client.search(
        collection_name="my_facts",
        query_vector=encoder.encode(question).tolist(),
        limit=3
    )
    
    context = " ".join(hit.payload["text"] for hit in hits)

    system_prompt = """You are a helpful co-worker, you will use the provided context to answer user questions.
    Read the given context before answering questions and think step by step. If you cannot answer a user question based on 
    the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""

    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

    instruction = f"Context: {context}\nUser: {question}"
    prompt_template = f"{B_INST}{B_SYS}{system_prompt}{E_SYS}{instruction}{E_INST}"
    
    print(prompt_template)
    result = llm(prompt_template)
    return result

def chat(messages, files):
    if files:
        setup_database(files)
    
    if not messages:
        return "Please upload PDF documents to initialize the database."

    last_message = messages[-1]["content"]
    response = answer(last_message)
    messages.append({"role": "assistant", "content": response})
    return messages

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    file_input = gr.File(label="Upload PDFs", file_count="multiple")
    with gr.Row():
        with gr.Column(scale=0.85):
            txt = gr.Textbox(show_label=False, placeholder="Enter your question here...").style(container=False)
        with gr.Column(scale=0.15, min_width=0):
            send_btn = gr.Button("Send")
    
    def respond(messages, files, txt):
        messages = chat(messages, files)
        return messages, None, ""

    send_btn.click(respond, [chatbot, file_input, txt], [chatbot, file_input, txt])
    txt.submit(respond, [chatbot, file_input, txt], [chatbot, file_input, txt])

demo.launch()