Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- hate_speech_demo.py +32 -13
hate_speech_demo.py
CHANGED
@@ -507,33 +507,52 @@ class ContextualAPIUtils:
|
|
507 |
return response_content, response_time, retrieval_text, error_message
|
508 |
|
509 |
# Contextual AI rating - UPDATED PROMPT FORMAT
|
|
|
510 |
def get_contextual_rating(contextual_api, user_input):
|
511 |
rating_prompt = f"Content to rate: {user_input}"
|
512 |
response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
|
513 |
if error:
|
514 |
return f"Error: {error}", "", "unsafe"
|
515 |
|
516 |
-
# Store the original response for safety level detection
|
517 |
-
original_response = response_text
|
518 |
-
|
519 |
# Determine safety level based on response content
|
520 |
safety_level = "safe"
|
521 |
-
if "out of policy" in
|
522 |
safety_level = "unsafe"
|
523 |
-
elif "caution" in
|
524 |
safety_level = "warning"
|
525 |
|
526 |
-
#
|
527 |
try:
|
528 |
-
#
|
529 |
-
|
530 |
-
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
except Exception as e:
|
533 |
-
print(f"Error formatting
|
534 |
-
|
535 |
|
536 |
-
return
|
537 |
|
538 |
# LlamaGuard rating
|
539 |
LLAMA_HARM_CATEGORIES = {
|
|
|
507 |
return response_content, response_time, retrieval_text, error_message
|
508 |
|
509 |
# Contextual AI rating - UPDATED PROMPT FORMAT
|
510 |
+
# Replace your get_contextual_rating function with this enhanced version
|
511 |
def get_contextual_rating(contextual_api, user_input):
|
512 |
rating_prompt = f"Content to rate: {user_input}"
|
513 |
response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
|
514 |
if error:
|
515 |
return f"Error: {error}", "", "unsafe"
|
516 |
|
|
|
|
|
|
|
517 |
# Determine safety level based on response content
|
518 |
safety_level = "safe"
|
519 |
+
if "out of policy" in response_text.lower() or "unsafe" in response_text.lower():
|
520 |
safety_level = "unsafe"
|
521 |
+
elif "caution" in response_text.lower() or "warning" in response_text.lower():
|
522 |
safety_level = "warning"
|
523 |
|
524 |
+
# Parse specific parts of the response
|
525 |
try:
|
526 |
+
# Look for policy rating
|
527 |
+
rating_match = re.search(r'Policy Rating: (.*?)(?:Policy|$)', response_text, re.IGNORECASE)
|
528 |
+
policy_rating = rating_match.group(1).strip() if rating_match else ""
|
529 |
+
|
530 |
+
# Look for policy category
|
531 |
+
category_match = re.search(r'Policy Category: (.*?)(?:Explanation|$)', response_text, re.IGNORECASE)
|
532 |
+
policy_category = category_match.group(1).strip() if category_match else ""
|
533 |
+
|
534 |
+
# Look for explanation
|
535 |
+
explanation_match = re.search(r'Explanation: (.*)', response_text, re.IGNORECASE)
|
536 |
+
explanation = explanation_match.group(1).strip() if explanation_match else response_text
|
537 |
+
|
538 |
+
# Format with HTML
|
539 |
+
formatted_text = ""
|
540 |
+
if policy_rating:
|
541 |
+
formatted_text += f"<strong>Policy Rating:</strong> {policy_rating}<br><br>"
|
542 |
+
if policy_category:
|
543 |
+
formatted_text += f"<strong>Policy Category:</strong> {policy_category}<br><br>"
|
544 |
+
if explanation:
|
545 |
+
formatted_text += f"<strong>Explanation:</strong> {explanation}"
|
546 |
+
|
547 |
+
# If we couldn't extract anything, use the original
|
548 |
+
if not formatted_text:
|
549 |
+
formatted_text = response_text
|
550 |
+
|
551 |
except Exception as e:
|
552 |
+
print(f"Error formatting rating: {e}")
|
553 |
+
formatted_text = response_text
|
554 |
|
555 |
+
return formatted_text, retrieval_text, safety_level
|
556 |
|
557 |
# LlamaGuard rating
|
558 |
LLAMA_HARM_CATEGORIES = {
|