nishantgaurav23 commited on
Commit
37477f9
·
verified ·
1 Parent(s): ba2d700

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -86
app.py CHANGED
@@ -164,37 +164,39 @@ class RAGPipeline:
164
  query = re.sub(r'\s+', ' ', query)
165
  return query
166
 
167
- def postprocess_response(self, response: str) -> str:
168
- """Clean up the generated response"""
169
- response = response.strip()
170
- response = re.sub(r'\s+', ' ', response)
171
- response = re.sub(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\+\d{2}:?\d{2})?', '', response)
172
- return response
173
 
174
- def query_model(self, prompt: str) -> str:
175
- """Query the local Llama model"""
176
- try:
177
- if self.llm is None:
178
- raise RuntimeError("Model not initialized")
 
 
 
 
 
 
 
179
 
180
- response = self.llm(
181
- prompt,
182
- max_tokens=512,
183
- temperature=0.4,
184
- top_p=0.95,
185
- echo=False,
186
- stop=["Question:", "\n\n"]
187
- )
188
-
189
- if response and 'choices' in response and len(response['choices']) > 0:
190
- text = response['choices'][0].get('text', '').strip()
191
- return text
192
- else:
193
- raise ValueError("No valid response generated")
194
 
195
- except Exception as e:
196
- logging.error(f"Error in query_model: {str(e)}")
197
- raise
198
 
199
  # def process_query(self, query: str, placeholder) -> str:
200
  # try:
@@ -258,90 +260,213 @@ class RAGPipeline:
258
  # placeholder.warning(message)
259
  # return message
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  def process_query(self, query: str, placeholder) -> str:
262
  try:
263
- # Preprocess query
264
  query = self.preprocess_query(query)
265
- logging.info(f"Processing query: {query}")
266
-
267
- # Show retrieval status
268
  status = placeholder.empty()
269
  status.write("🔍 Finding relevant information...")
270
-
271
- # Get embeddings and search
272
  query_embedding = self.retriever.encode([query])
273
  similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
274
  scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
275
-
276
- # Log similarity scores
277
- for idx, score in zip(indices.tolist(), scores.tolist()):
278
- logging.info(f"Score: {score:.4f} | Document: {self.documents[idx][:100]}...")
279
-
280
  relevant_docs = [self.documents[idx] for idx in indices.tolist()]
 
 
 
 
 
281
 
282
- # Update status
283
  status.write("💭 Generating response...")
284
-
285
- # Prepare context and prompt
286
- context = "\n".join(relevant_docs[:3])
287
  prompt = f"""Context information is below:
288
- {context}
289
-
290
  Given the context above, please answer the following question:
291
  {query}
292
-
293
- Guidelines:
 
 
 
 
 
 
 
 
 
 
 
294
  - If you cannot answer based on the context, say so politely
295
- - Keep the response concise and focused
296
- - Only include sports-related information
297
- - No dates or timestamps in the response
298
- - Use clear, natural language
299
 
 
 
300
  Answer:"""
301
-
302
- # Generate response
303
  response_placeholder = placeholder.empty()
304
-
305
- try:
306
- # Add logging for model state
307
- logging.info("Model state check - Is None?: " + str(self.llm is None))
308
-
309
- # Directly use Llama model
310
- response = self.llm(
311
- prompt,
312
- max_tokens=512,
313
- temperature=0.4,
314
- top_p=0.95,
315
- echo=False,
316
- stop=["Question:", "\n\n"]
317
- )
318
-
319
- logging.info(f"Raw model response: {response}")
320
-
321
- if response and isinstance(response, dict) and 'choices' in response:
322
- generated_text = response['choices'][0].get('text', '').strip()
323
- if generated_text:
324
- final_response = self.postprocess_response(generated_text)
325
- response_placeholder.markdown(final_response)
326
- return final_response
327
-
328
- message = "No relevant answer found. Please try rephrasing your question."
329
- response_placeholder.warning(message)
330
- return message
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  except Exception as e:
333
  logging.error(f"Generation error: {str(e)}")
334
- logging.error(f"Full error details: ", exc_info=True)
335
- message = f"Had some trouble generating the response: {str(e)}"
336
  response_placeholder.warning(message)
337
  return message
338
-
339
  except Exception as e:
340
  logging.error(f"Process error: {str(e)}")
341
- logging.error(f"Full error details: ", exc_info=True)
342
- message = f"Something went wrong: {str(e)}"
343
  placeholder.warning(message)
344
- return message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  @st.cache_resource(show_spinner=False)
347
  def initialize_rag_pipeline():
 
164
  query = re.sub(r'\s+', ' ', query)
165
  return query
166
 
167
+ ### Added on Nov 2, 2024
 
 
 
 
 
168
 
169
+ # def postprocess_response(self, response: str) -> str:
170
+ # """Clean up the generated response"""
171
+ # response = response.strip()
172
+ # response = re.sub(r'\s+', ' ', response)
173
+ # response = re.sub(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\+\d{2}:?\d{2})?', '', response)
174
+ # return response
175
+
176
+ # def query_model(self, prompt: str) -> str:
177
+ # """Query the local Llama model"""
178
+ # try:
179
+ # if self.llm is None:
180
+ # raise RuntimeError("Model not initialized")
181
 
182
+ # response = self.llm(
183
+ # prompt,
184
+ # max_tokens=512,
185
+ # temperature=0.4,
186
+ # top_p=0.95,
187
+ # echo=False,
188
+ # stop=["Question:", "\n\n"]
189
+ # )
190
+
191
+ # if response and 'choices' in response and len(response['choices']) > 0:
192
+ # text = response['choices'][0].get('text', '').strip()
193
+ # return text
194
+ # else:
195
+ # raise ValueError("No valid response generated")
196
 
197
+ # except Exception as e:
198
+ # logging.error(f"Error in query_model: {str(e)}")
199
+ # raise
200
 
201
  # def process_query(self, query: str, placeholder) -> str:
202
  # try:
 
260
  # placeholder.warning(message)
261
  # return message
262
 
263
+ # def process_query(self, query: str, placeholder) -> str:
264
+ # try:
265
+ # # Preprocess query
266
+ # query = self.preprocess_query(query)
267
+ # logging.info(f"Processing query: {query}")
268
+
269
+ # # Show retrieval status
270
+ # status = placeholder.empty()
271
+ # status.write("🔍 Finding relevant information...")
272
+
273
+ # # Get embeddings and search
274
+ # query_embedding = self.retriever.encode([query])
275
+ # similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
276
+ # scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
277
+
278
+ # # Log similarity scores
279
+ # for idx, score in zip(indices.tolist(), scores.tolist()):
280
+ # logging.info(f"Score: {score:.4f} | Document: {self.documents[idx][:100]}...")
281
+
282
+ # relevant_docs = [self.documents[idx] for idx in indices.tolist()]
283
+
284
+ # # Update status
285
+ # status.write("💭 Generating response...")
286
+
287
+ # # Prepare context and prompt
288
+ # context = "\n".join(relevant_docs[:3])
289
+ # prompt = f"""Context information is below:
290
+ # {context}
291
+
292
+ # Given the context above, please answer the following question:
293
+ # {query}
294
+
295
+ # Guidelines:
296
+ # - If you cannot answer based on the context, say so politely
297
+ # - Keep the response concise and focused
298
+ # - Only include sports-related information
299
+ # - No dates or timestamps in the response
300
+ # - Use clear, natural language
301
+
302
+ # Answer:"""
303
+
304
+ # # Generate response
305
+ # response_placeholder = placeholder.empty()
306
+
307
+ # try:
308
+ # # Add logging for model state
309
+ # logging.info("Model state check - Is None?: " + str(self.llm is None))
310
+
311
+ # # Directly use Llama model
312
+ # response = self.llm(
313
+ # prompt,
314
+ # max_tokens=512,
315
+ # temperature=0.4,
316
+ # top_p=0.95,
317
+ # echo=False,
318
+ # stop=["Question:", "\n\n"]
319
+ # )
320
+
321
+ # logging.info(f"Raw model response: {response}")
322
+
323
+ # if response and isinstance(response, dict) and 'choices' in response:
324
+ # generated_text = response['choices'][0].get('text', '').strip()
325
+ # if generated_text:
326
+ # final_response = self.postprocess_response(generated_text)
327
+ # response_placeholder.markdown(final_response)
328
+ # return final_response
329
+
330
+ # message = "No relevant answer found. Please try rephrasing your question."
331
+ # response_placeholder.warning(message)
332
+ # return message
333
+
334
+ # except Exception as e:
335
+ # logging.error(f"Generation error: {str(e)}")
336
+ # logging.error(f"Full error details: ", exc_info=True)
337
+ # message = f"Had some trouble generating the response: {str(e)}"
338
+ # response_placeholder.warning(message)
339
+ # return message
340
+
341
+ # except Exception as e:
342
+ # logging.error(f"Process error: {str(e)}")
343
+ # logging.error(f"Full error details: ", exc_info=True)
344
+ # message = f"Something went wrong: {str(e)}"
345
+ # placeholder.warning(message)
346
+ # return message
347
+
348
+ ### Added on Nov 2, 2024
349
+ def postprocess_response(self, response: str) -> str:
350
+ """Clean up the generated response"""
351
+ try:
352
+ # Remove datetime patterns and other unwanted content
353
+ response = re.sub(r'\d{4}-\d{2}-\d{2}(?:T|\s)\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?', '', response)
354
+ response = re.sub(r'User \d+:.*?(?=User \d+:|$)', '', response)
355
+ response = re.sub(r'\d{2}:\d{2}(?::\d{2})?(?:\s?(?:AM|PM))?', '', response)
356
+ response = re.sub(r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}', '', response)
357
+ response = re.sub(r'(?m)^User \d+:', '', response)
358
+
359
+ # Clean up spacing but preserve intentional paragraph breaks
360
+ # Replace multiple newlines with two newlines (one paragraph break)
361
+ response = re.sub(r'\n\s*\n\s*\n+', '\n\n', response)
362
+ # Replace multiple spaces with single space
363
+ response = re.sub(r' +', ' ', response)
364
+ # Clean up beginning/end
365
+ response = response.strip()
366
+
367
+ return response
368
+ except Exception as e:
369
+ logging.error(f"Error in postprocess_response: {str(e)}")
370
+ return response
371
+
372
  def process_query(self, query: str, placeholder) -> str:
373
  try:
 
374
  query = self.preprocess_query(query)
 
 
 
375
  status = placeholder.empty()
376
  status.write("🔍 Finding relevant information...")
377
+
 
378
  query_embedding = self.retriever.encode([query])
379
  similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
380
  scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
381
+
 
 
 
 
382
  relevant_docs = [self.documents[idx] for idx in indices.tolist()]
383
+ cleaned_docs = []
384
+ for doc in relevant_docs[:3]:
385
+ cleaned_text = self.postprocess_response(doc)
386
+ if cleaned_text:
387
+ cleaned_docs.append(cleaned_text)
388
 
 
389
  status.write("💭 Generating response...")
390
+
 
 
391
  prompt = f"""Context information is below:
392
+ {' '.join(cleaned_docs)}
393
+
394
  Given the context above, please answer the following question:
395
  {query}
396
+
397
+ Guidelines for your response:
398
+ - Structure your response in clear, logical paragraphs
399
+ - Start a new paragraph for each new main point or aspect
400
+ - If listing multiple items, use separate paragraphs
401
+ - Keep each paragraph focused on a single topic or point
402
+ - Use natural paragraph breaks where the content shifts focus
403
+ - Maintain clear transitions between paragraphs
404
+ - If providing statistics or achievements, group them logically
405
+ - If describing different aspects (e.g., career, playing style, achievements), use separate paragraphs
406
+ - Keep paragraphs concise but complete
407
+ - Exclude any dates, timestamps, or user comments
408
+ - Focus on factual sports information
409
  - If you cannot answer based on the context, say so politely
 
 
 
 
410
 
411
+ Format your response with proper paragraph breaks where appropriate.
412
+
413
  Answer:"""
414
+
 
415
  response_placeholder = placeholder.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
+ try:
418
+ response_text = self.query_model(prompt)
419
+ if response_text:
420
+ # Clean up the response while preserving paragraph structure
421
+ final_response = self.postprocess_response(response_text)
422
+
423
+ # Convert cleaned response to markdown with proper paragraph spacing
424
+ markdown_response = final_response.replace('\n\n', '\n\n \n\n') # Add visual spacing between paragraphs
425
+
426
+ response_placeholder.markdown(markdown_response)
427
+ return final_response
428
+ else:
429
+ message = "No relevant answer found. Please try rephrasing your question."
430
+ response_placeholder.warning(message)
431
+ return message
432
+
433
  except Exception as e:
434
  logging.error(f"Generation error: {str(e)}")
435
+ message = "Had some trouble generating the response. Please try again."
 
436
  response_placeholder.warning(message)
437
  return message
438
+
439
  except Exception as e:
440
  logging.error(f"Process error: {str(e)}")
441
+ message = "Something went wrong. Please try again with a different question."
 
442
  placeholder.warning(message)
443
+ return messag
444
+
445
+ def query_model(self, prompt: str) -> str:
446
+ """Query the local Llama model"""
447
+ try:
448
+ if self.llm is None:
449
+ raise RuntimeError("Model not initialized")
450
+
451
+ response = self.llm(
452
+ prompt,
453
+ max_tokens=512,
454
+ temperature=0.4,
455
+ top_p=0.95,
456
+ echo=False,
457
+ stop=["Question:", "Context:", "Guidelines:"], # Removed \n\n from stop tokens to allow paragraphs
458
+ repeat_penalty=1.1 # Added to encourage more diverse text
459
+ )
460
+
461
+ if response and 'choices' in response and len(response['choices']) > 0:
462
+ text = response['choices'][0].get('text', '').strip()
463
+ return text
464
+ else:
465
+ raise ValueError("No valid response generated")
466
+
467
+ except Exception as e:
468
+ logging.error(f"Error in query_model: {str(e)}")
469
+ raise
470
 
471
  @st.cache_resource(show_spinner=False)
472
  def initialize_rag_pipeline():