rwillats commited on
Commit
2ff25ee
·
verified ·
1 Parent(s): 80fa2b5

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. hate_speech_demo.py +32 -13
hate_speech_demo.py CHANGED
@@ -507,33 +507,52 @@ class ContextualAPIUtils:
507
  return response_content, response_time, retrieval_text, error_message
508
 
509
  # Contextual AI rating - UPDATED PROMPT FORMAT
 
510
  def get_contextual_rating(contextual_api, user_input):
511
  rating_prompt = f"Content to rate: {user_input}"
512
  response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
513
  if error:
514
  return f"Error: {error}", "", "unsafe"
515
 
516
- # Store the original response for safety level detection
517
- original_response = response_text
518
-
519
  # Determine safety level based on response content
520
  safety_level = "safe"
521
- if "out of policy" in original_response.lower() or "unsafe" in original_response.lower():
522
  safety_level = "unsafe"
523
- elif "caution" in original_response.lower() or "warning" in original_response.lower():
524
  safety_level = "warning"
525
 
526
- # Try to format the response (but keep the original if anything goes wrong)
527
  try:
528
- # Simple formatting approach - just add line breaks between sentences
529
- formatted_response = response_text.replace(". ", ".<br><br>")
530
- # Ensure we don't have too many breaks
531
- formatted_response = formatted_response.replace("<br><br><br>", "<br><br>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  except Exception as e:
533
- print(f"Error formatting response: {e}")
534
- formatted_response = response_text
535
 
536
- return formatted_response, retrieval_text, safety_level
537
 
538
  # LlamaGuard rating
539
  LLAMA_HARM_CATEGORIES = {
 
507
  return response_content, response_time, retrieval_text, error_message
508
 
509
  # Contextual AI rating - UPDATED PROMPT FORMAT
510
+ # Replace your get_contextual_rating function with this enhanced version
511
  def get_contextual_rating(contextual_api, user_input):
512
  rating_prompt = f"Content to rate: {user_input}"
513
  response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
514
  if error:
515
  return f"Error: {error}", "", "unsafe"
516
 
 
 
 
517
  # Determine safety level based on response content
518
  safety_level = "safe"
519
+ if "out of policy" in response_text.lower() or "unsafe" in response_text.lower():
520
  safety_level = "unsafe"
521
+ elif "caution" in response_text.lower() or "warning" in response_text.lower():
522
  safety_level = "warning"
523
 
524
+ # Parse specific parts of the response
525
  try:
526
+ # Look for policy rating
527
+ rating_match = re.search(r'Policy Rating: (.*?)(?:Policy|$)', response_text, re.IGNORECASE)
528
+ policy_rating = rating_match.group(1).strip() if rating_match else ""
529
+
530
+ # Look for policy category
531
+ category_match = re.search(r'Policy Category: (.*?)(?:Explanation|$)', response_text, re.IGNORECASE)
532
+ policy_category = category_match.group(1).strip() if category_match else ""
533
+
534
+ # Look for explanation
535
+ explanation_match = re.search(r'Explanation: (.*)', response_text, re.IGNORECASE)
536
+ explanation = explanation_match.group(1).strip() if explanation_match else response_text
537
+
538
+ # Format with HTML
539
+ formatted_text = ""
540
+ if policy_rating:
541
+ formatted_text += f"<strong>Policy Rating:</strong> {policy_rating}<br><br>"
542
+ if policy_category:
543
+ formatted_text += f"<strong>Policy Category:</strong> {policy_category}<br><br>"
544
+ if explanation:
545
+ formatted_text += f"<strong>Explanation:</strong> {explanation}"
546
+
547
+ # If we couldn't extract anything, use the original
548
+ if not formatted_text:
549
+ formatted_text = response_text
550
+
551
  except Exception as e:
552
+ print(f"Error formatting rating: {e}")
553
+ formatted_text = response_text
554
 
555
+ return formatted_text, retrieval_text, safety_level
556
 
557
  # LlamaGuard rating
558
  LLAMA_HARM_CATEGORIES = {