Baweja commited on
Commit
0ee50a8
·
verified ·
1 Parent(s): 5e9d294

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -34
app.py CHANGED
@@ -139,6 +139,153 @@
139
  # if __name__ == "__main__":
140
  # demo.launch(share = True )
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  import torch
143
  import transformers
144
  from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForCausalLM
@@ -150,7 +297,6 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
150
  dataset_path = "./5k_index_data/my_knowledge_dataset"
151
  index_path = "./5k_index_data/my_knowledge_dataset_hnsw_index.faiss"
152
 
153
- tokenizer = AutoTokenizer.from_pretrained("facebook/rag-sequence-nq")
154
  retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="custom",
155
  passages_path = dataset_path,
156
  index_path = index_path,
@@ -158,12 +304,12 @@ retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name=
158
  rag_model = RagSequenceForGeneration.from_pretrained('facebook/rag-sequence-nq', retriever=retriever)
159
  rag_model.retriever.init_retrieval()
160
  rag_model.to(device)
161
- model = AutoModelForCausalLM.from_pretrained('HuggingFaceH4/zephyr-7b-beta',
162
- device_map = 'auto',
163
- torch_dtype = torch.bfloat16,
164
- )
165
-
166
-
167
 
168
  def strip_title(title):
169
  if title.startswith('"'):
@@ -173,21 +319,7 @@ def strip_title(title):
173
 
174
  return title
175
 
176
- # getting the correct format to input in gemma model
177
- def input_format(query, context):
178
- # sys_instruction = f'Context:\n {context} \n Given the following information, generate answer to the question. Provide links in the answer from the information to increase credebility.'
179
- # message = f'Question: {query}'
180
-
181
- # return f'<bos><start_of_turn>\n{sys_instruction}' + f' {message}<end_of_turn>\n'
182
- return [
183
- {
184
- "role": "system", "content": f'Context:\n {context} \n Given the following information, generate answer to the question. Provide links in the answer from the information to increase credebility.' },
185
-
186
- {
187
- "role": "user", "content": f"{query}"},
188
- ]
189
 
190
- # retrieving and generating answer in one call
191
  def retrieved_info(query, rag_model = rag_model, generating_model = model):
192
  # Tokenize Query
193
  retriever_input_ids = rag_model.retriever.question_encoder_tokenizer.batch_encode_plus(
@@ -217,16 +349,20 @@ def retrieved_info(query, rag_model = rag_model, generating_model = model):
217
  texts = docs['text']
218
  for title, text in zip(titles, texts):
219
  retrieved_context.append(f'{title}: {text}')
220
- print(retrieved_context)
221
-
222
- generation_model_input = input_format(query, retrieved_context[0])
223
 
 
224
  # Generating answer using gemma model
225
- tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
226
- input_ids = tokenizer(generation_model_input, return_tensors='pt')['input_ids'].to(device)
227
- output = generating_model.generate(input_ids, max_new_tokens = 256)
 
 
 
 
 
228
 
229
- return tokenizer.decode(output[0])
 
230
 
231
 
232
  def respond(
@@ -252,16 +388,17 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
252
  # Custom title and description
253
  title = "🧠 Welcome to Your AI Knowledge Assistant"
254
  description = """
255
- Hi!! I am your loyal assistant. My functionality is based on the RAG model. I retrieve relevant information and provide answers based on that. Ask me any questions, and let me assist you.
256
- My capabilities are limited because I am still in the development phase. I will do my best to assist you. SOOO LET'S BEGGINNNN......
257
  """
258
 
 
259
  demo = gr.ChatInterface(
260
  respond,
261
  type = 'messages',
262
  additional_inputs=[
263
  gr.Textbox(value="You are a helpful and friendly assistant.", label="System message"),
264
- gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
265
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
266
  gr.Slider(
267
  minimum=0.1,
@@ -277,10 +414,8 @@ demo = gr.ChatInterface(
277
  examples=[["✨Future of AI"], ["📱App Development"]],
278
  #example_icons=["🤖", "📱"],
279
  theme="compact",
280
- submit_btn = True,
281
  )
282
 
283
 
284
  if __name__ == "__main__":
285
- demo.launch(share = True,
286
- show_error = True)
 
139
  # if __name__ == "__main__":
140
  # demo.launch(share = True )
141
 
142
+ # import torch
143
+ # import transformers
144
+ # from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForCausalLM
145
+ # import gradio as gr
146
+
147
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
148
+
149
+
150
+ # dataset_path = "./5k_index_data/my_knowledge_dataset"
151
+ # index_path = "./5k_index_data/my_knowledge_dataset_hnsw_index.faiss"
152
+
153
+ # tokenizer = AutoTokenizer.from_pretrained("facebook/rag-sequence-nq")
154
+ # retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="custom",
155
+ # passages_path = dataset_path,
156
+ # index_path = index_path,
157
+ # n_docs = 5)
158
+ # rag_model = RagSequenceForGeneration.from_pretrained('facebook/rag-sequence-nq', retriever=retriever)
159
+ # rag_model.retriever.init_retrieval()
160
+ # rag_model.to(device)
161
+ # model = AutoModelForCausalLM.from_pretrained('HuggingFaceH4/zephyr-7b-beta',
162
+ # device_map = 'auto',
163
+ # torch_dtype = torch.bfloat16,
164
+ # )
165
+
166
+
167
+
168
+ # def strip_title(title):
169
+ # if title.startswith('"'):
170
+ # title = title[1:]
171
+ # if title.endswith('"'):
172
+ # title = title[:-1]
173
+
174
+ # return title
175
+
176
+ # # getting the correct format to input in gemma model
177
+ # def input_format(query, context):
178
+ # # sys_instruction = f'Context:\n {context} \n Given the following information, generate answer to the question. Provide links in the answer from the information to increase credebility.'
179
+ # # message = f'Question: {query}'
180
+
181
+ # # return f'<bos><start_of_turn>\n{sys_instruction}' + f' {message}<end_of_turn>\n'
182
+ # return [
183
+ # {
184
+ # "role": "system", "content": f'Context:\n {context} \n Given the following information, generate answer to the question. Provide links in the answer from the information to increase credebility.' },
185
+
186
+ # {
187
+ # "role": "user", "content": f"{query}"},
188
+ # ]
189
+
190
+ # # retrieving and generating answer in one call
191
+ # def retrieved_info(query, rag_model = rag_model, generating_model = model):
192
+ # # Tokenize Query
193
+ # retriever_input_ids = rag_model.retriever.question_encoder_tokenizer.batch_encode_plus(
194
+ # [query],
195
+ # return_tensors = 'pt',
196
+ # padding = True,
197
+ # truncation = True,
198
+ # )['input_ids'].to(device)
199
+
200
+ # # Retrieve Documents
201
+ # question_encoder_output = rag_model.rag.question_encoder(retriever_input_ids)
202
+ # question_encoder_pool_output = question_encoder_output[0]
203
+
204
+ # result = rag_model.retriever(
205
+ # retriever_input_ids,
206
+ # question_encoder_pool_output.cpu().detach().to(torch.float32).numpy(),
207
+ # prefix = rag_model.rag.generator.config.prefix,
208
+ # n_docs = rag_model.config.n_docs,
209
+ # return_tensors = 'pt',
210
+ # )
211
+
212
+ # # Preparing query and retrieved docs for model
213
+ # all_docs = rag_model.retriever.index.get_doc_dicts(result.doc_ids)
214
+ # retrieved_context = []
215
+ # for docs in all_docs:
216
+ # titles = [strip_title(title) for title in docs['title']]
217
+ # texts = docs['text']
218
+ # for title, text in zip(titles, texts):
219
+ # retrieved_context.append(f'{title}: {text}')
220
+ # print(retrieved_context)
221
+
222
+ # generation_model_input = input_format(query, retrieved_context[0])
223
+
224
+ # # Generating answer using gemma model
225
+ # tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
226
+ # input_ids = tokenizer(generation_model_input, return_tensors='pt')['input_ids'].to(device)
227
+ # output = generating_model.generate(input_ids, max_new_tokens = 256)
228
+
229
+ # return tokenizer.decode(output[0])
230
+
231
+
232
+ # def respond(
233
+ # message,
234
+ # history: list[tuple[str, str]],
235
+ # system_message,
236
+ # max_tokens ,
237
+ # temperature,
238
+ # top_p,
239
+ # ):
240
+ # if message: # If there's a user query
241
+ # response = retrieved_info(message) # Get the answer from your local FAISS and Q&A model
242
+ # return response
243
+
244
+ # # In case no message, return an empty string
245
+ # return ""
246
+
247
+
248
+
249
+ # """
250
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
251
+ # """
252
+ # # Custom title and description
253
+ # title = "🧠 Welcome to Your AI Knowledge Assistant"
254
+ # description = """
255
+ # Hi!! I am your loyal assistant. My functionality is based on the RAG model. I retrieve relevant information and provide answers based on that. Ask me any questions, and let me assist you.
256
+ # My capabilities are limited because I am still in the development phase. I will do my best to assist you. SOOO LET'S BEGGINNNN......
257
+ # """
258
+
259
+ # demo = gr.ChatInterface(
260
+ # respond,
261
+ # type = 'messages',
262
+ # additional_inputs=[
263
+ # gr.Textbox(value="You are a helpful and friendly assistant.", label="System message"),
264
+ # gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
265
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
266
+ # gr.Slider(
267
+ # minimum=0.1,
268
+ # maximum=1.0,
269
+ # value=0.95,
270
+ # step=0.05,
271
+ # label="Top-p (nucleus sampling)",
272
+ # ),
273
+ # ],
274
+ # title=title,
275
+ # description=description,
276
+ # textbox=gr.Textbox(placeholder=["'What is the future of AI?' or 'App Development'"]),
277
+ # examples=[["✨Future of AI"], ["📱App Development"]],
278
+ # #example_icons=["🤖", "📱"],
279
+ # theme="compact",
280
+ # submit_btn = True,
281
+ # )
282
+
283
+
284
+ # if __name__ == "__main__":
285
+ # demo.launch(share = True,
286
+ # show_error = True)
287
+
288
+
289
  import torch
290
  import transformers
291
  from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForCausalLM
 
297
  dataset_path = "./5k_index_data/my_knowledge_dataset"
298
  index_path = "./5k_index_data/my_knowledge_dataset_hnsw_index.faiss"
299
 
 
300
  retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="custom",
301
  passages_path = dataset_path,
302
  index_path = index_path,
 
304
  rag_model = RagSequenceForGeneration.from_pretrained('facebook/rag-sequence-nq', retriever=retriever)
305
  rag_model.retriever.init_retrieval()
306
  rag_model.to(device)
307
+ pipe = pipeline(
308
+ "text-generation",
309
+ model="google/gemma-2-2b-it",
310
+ model_kwargs={"torch_dtype": torch.bfloat16},
311
+ device=device, # replace with "mps" to run on a Mac device
312
+ )
313
 
314
  def strip_title(title):
315
  if title.startswith('"'):
 
319
 
320
  return title
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
 
323
  def retrieved_info(query, rag_model = rag_model, generating_model = model):
324
  # Tokenize Query
325
  retriever_input_ids = rag_model.retriever.question_encoder_tokenizer.batch_encode_plus(
 
349
  texts = docs['text']
350
  for title, text in zip(titles, texts):
351
  retrieved_context.append(f'{title}: {text}')
 
 
 
352
 
353
+
354
  # Generating answer using gemma model
355
+
356
+ messages = [
357
+ {"role": "user", "content": f"{query}"},
358
+ {"role": "system" , "content": f"Context: {retrieved_context}. Use the information from the Context to answer"}
359
+ ]
360
+
361
+ outputs = pipe(messages, max_new_tokens=256)
362
+ assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
363
 
364
+ return assistant_response
365
+
366
 
367
 
368
  def respond(
 
388
  # Custom title and description
389
  title = "🧠 Welcome to Your AI Knowledge Assistant"
390
  description = """
391
+ HI!!, I am your loyal assistant, y functionality is based on RAG model, I retrieves relevant information and provide answers based on that. Ask me any question, and let me assist you.
392
+ My capabilities are limited because I am still in development phase. I will do my best to assist you. SOOO LET'S BEGGINNNN......
393
  """
394
 
395
+
396
  demo = gr.ChatInterface(
397
  respond,
398
  type = 'messages',
399
  additional_inputs=[
400
  gr.Textbox(value="You are a helpful and friendly assistant.", label="System message"),
401
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
402
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
403
  gr.Slider(
404
  minimum=0.1,
 
414
  examples=[["✨Future of AI"], ["📱App Development"]],
415
  #example_icons=["🤖", "📱"],
416
  theme="compact",
 
417
  )
418
 
419
 
420
  if __name__ == "__main__":
421
+ demo.launch(share = True )