File size: 10,945 Bytes
8d8cebe
 
 
 
 
 
 
 
 
 
 
d5cf104
8d8cebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5cf104
8d8cebe
d5cf104
8d8cebe
 
 
 
 
 
 
 
 
 
d5cf104
 
 
 
 
 
 
 
 
 
8d8cebe
 
1873131
8d8cebe
 
 
8083b67
8d8cebe
 
1873131
 
8d8cebe
 
 
 
 
 
1873131
 
 
8083b67
1873131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8083b67
1873131
 
 
 
 
 
8083b67
1873131
8d8cebe
 
 
 
1873131
8d8cebe
d5cf104
8d8cebe
f7bc6a1
8083b67
 
 
 
 
f7bc6a1
 
 
8083b67
 
 
1873131
8083b67
1873131
8083b67
 
 
 
 
 
 
 
 
 
1873131
8083b67
 
 
 
 
f7bc6a1
8083b67
 
1873131
8083b67
 
 
 
 
 
 
 
 
 
 
 
 
f7bc6a1
8083b67
 
 
 
 
834c808
 
 
 
 
d5cf104
 
834c808
 
 
 
1873131
834c808
d5cf104
834c808
d5cf104
 
 
 
 
 
 
834c808
1873131
d5cf104
1873131
 
 
d5cf104
 
 
 
 
 
 
 
 
 
 
 
 
1873131
d5cf104
 
 
834c808
 
8d8cebe
 
7bc7b53
1873131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc7b53
8d8cebe
 
 
 
 
 
7bc7b53
8d8cebe
 
 
 
 
 
 
7bc7b53
8d8cebe
 
 
 
 
 
 
7bc7b53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8cebe
 
 
d5cf104
8d8cebe
d5cf104
 
1873131
d5cf104
 
 
 
 
 
1873131
 
8d8cebe
d5cf104
 
8d8cebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup UI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading settings from ../../env/ai.json\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "# If the file does not exist it'll default to the manual setting see below\n",
    "filePathToSettingsFile = '../../env/ai.json'\n",
    "\n",
    "# Is there a settings file? \n",
    "if os.path.exists(filePathToSettingsFile):\n",
    "    # Yes there is so load settings from there\n",
    "    \n",
    "    print(f'Loading settings from {filePathToSettingsFile}')\n",
    "    f = open(filePathToSettingsFile)\n",
    "    settingsJson = json.load(f)\n",
    "    del f\n",
    "\n",
    "    for key in settingsJson:\n",
    "        os.environ[key] = settingsJson[key]\n",
    "        \n",
    "    del settingsJson\n",
    "else:        \n",
    "    # Set variables manually\n",
    "    \n",
    "    print('Setting variables manually as there is not ai.json settings file')\n",
    "\n",
    "    # Update the variables below with your own settings\n",
    "    os.environ['REQUESTS_CA_BUNDLE'] = '../../env/ZCert.pem'    \n",
    "    os.environ['HUGGING_FACE_API_KEY'] = 'Get here: https://huggingface.co/settings/tokens'\n",
    "    os.environ['OPENAI_API_KEY'] = 'Get here: https://platform.openai.com/account/api-keys'\n",
    "    os.environ[\"SERPAPI_API_KEY\"] = 'serpapi KEY, Get here: https://serpapi.com/manage-api-key'    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pinecone\n",
    "from langchain.chains import RetrievalQA\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.llms import OpenAI\n",
    "from langchain.vectorstores import Pinecone\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup Vector Store\n",
    "There are two vector stores in Pinecone (hence the two API Keys). Each has a separate knowledge base\n",
    "1. Roman history\n",
    "2. A list of literature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using vector store: Roman History\n",
      "Using index: rag-demo-1-history-rome\n"
     ]
    }
   ],
   "source": [
    "embeddings = OpenAIEmbeddings()\n",
    "\n",
    "vector_store = None\n",
    "\n",
    "def create_vector_store(indexToUse: str):\n",
    "    \n",
    "    if indexToUse == \"Roman History\":\n",
    "        print(f\"Using vector store: {indexToUse}\")\n",
    "        apiKeyKey = \"PINECONE_API_KEY_2\"\n",
    "        apiEnvKey = \"PINECONE_API_ENV_2\"\n",
    "        testQuestion = \"When was Ceasar born?\"\n",
    "    else:\n",
    "        print(f\"Using vector store: {indexToUse}\")\n",
    "        apiKeyKey = \"PINECONE_API_KEY\"\n",
    "        apiEnvKey = \"PINECONE_API_ENV\"\n",
    "        testQuestion = \"What is Moby Dick?\"\n",
    "        \n",
    "    pinecone.init(api_key=os.environ[apiKeyKey], environment=os.environ[apiEnvKey])\n",
    "\n",
    "    index_name = pinecone.list_indexes()[0]\n",
    "    print(f\"Using index: {index_name}\")\n",
    "    index = pinecone.Index(index_name)\n",
    "    vector_store = Pinecone(index, embeddings, \"text\")    \n",
    "\n",
    "    # query = testQuestion\n",
    "    # print(f'Test questions: {testQuestion}')\n",
    "    # result = vector_store.similarity_search(query, k=3)\n",
    "    # print(result[0])\n",
    "    \n",
    "    return vector_store\n",
    "\n",
    "vector_store = create_vector_store(\"Roman History\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(temperature=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Conversational agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.memory import ConversationBufferMemory\n",
    "from langchain.chains import ConversationalRetrievalChain\n",
    "\n",
    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages= True)\n",
    "chain = ConversationalRetrievalChain.from_llm(llm, retriever= vector_store.as_retriever(), memory= memory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' \\nCrassus was a Roman general who was killed in battle by the Parthians. He was killed while trying to extend the Roman Empire into the Middle East. Ceasar was avenging Crassus by trying to defeat the Parthians and expand the Roman Empire. He was also trying to avenge the death of his friend and mentor.'"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\" \n",
    "    SYSTEM:\n",
    "    You are a helpful teacher who is teaching a class of 10 year olds. \n",
    "    Your answers must only come from the context provided to you in the question.\n",
    "    If you don't know the answer then say so. \n",
    "    The answers should be at least 40 words or longer\n",
    "    \n",
    "    QUESTION:\n",
    "    Why was he avenging Crassus, what happened to him that Ceasar needed to avenge him? \n",
    "    \n",
    "\"\"\"\n",
    "chain.run({'question': query})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# UI\n",
    "1. https://medium.com/@gabriel_renno/how-to-build-a-gpt3-5-powered-chatbot-for-your-landing-page-with-langchain-and-gradio-1236ddfb0cf1\n",
    "2. https://github.com/RajKKapadia/YouTube-Pinecone-Demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "# RajKKapadi's \n",
    "from langchain.chains import ConversationalRetrievalChain\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.vectorstores import Pinecone\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.memory import ConversationBufferMemory\n",
    "import pinecone\n",
    "\n",
    "def create_conversation(query: str, chat_history: list, indexToUse: str) -> tuple: \n",
    "    try:\n",
    "        #vector_store = create_vector_store(\"Roman History\")\n",
    "        print(indexToUse)\n",
    "        vector_store = create_vector_store(\"Literature\")\n",
    "        memory = ConversationBufferMemory(\n",
    "            memory_key='chat_history',\n",
    "            return_messages=False\n",
    "        )\n",
    "        cqa = ConversationalRetrievalChain.from_llm(\n",
    "            llm=ChatOpenAI(temperature=0.0,\n",
    "                           openai_api_key=os.environ['OPENAI_API_KEY']),\n",
    "            retriever=vector_store.as_retriever(search_kwargs={\"k\": 5}),\n",
    "            memory=memory,\n",
    "            get_chat_history=lambda h: h,\n",
    "        )\n",
    "        result = cqa({'question': query, 'chat_history': chat_history})\n",
    "        chat_history.append((query, result['answer']))\n",
    "        return '', chat_history, indexToUse\n",
    "    except Exception as e:\n",
    "        chat_history.append((query, e))\n",
    "        return '', chat_history"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# vector_store_index_to_use = None \n",
    "# def set_vector_store_index(indexName:str):\n",
    "#     vector_store_index_to_use = indexName\n",
    "    \n",
    "# with gr.Blocks() as demo:\n",
    "#     indexToUseDD = gr.Dropdown(choices=[\"Roman History\", \"Literature\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7918\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7918/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n",
      "Literature\n",
      "Using vector store: Literature\n",
      "Using index: rag-demo-1-literature\n"
     ]
    }
   ],
   "source": [
    "import gradio as gr\n",
    "\n",
    "with gr.Blocks() as demo:\n",
    "    gr.Markdown(\"Chat to docs\")\n",
    "    indexToUseDD = gr.Dropdown(choices=[\"Roman History\", \"Literature\"])\n",
    "    chatbot = gr.Chatbot(label='Talk to the Doument')\n",
    "    msg = gr.Textbox()\n",
    "    \n",
    "    submitBtn = gr.Button(value=\"Submit\")    \n",
    "    clear = gr.ClearButton([msg, chatbot])\n",
    "\n",
    "    msg.submit(create_conversation, [msg, chatbot, indexToUseDD], [msg, chatbot, indexToUseDD])\n",
    "    submitBtn.click(create_conversation, [msg, chatbot, indexToUseDD], [msg, chatbot, indexToUseDD])\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    demo.launch(show_error=True) #show_error=True, debug=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}