Spaces:

rwillats
/

hatespeech

Sleeping

App Files Files Community

rwillats commited on Apr 21

Commit

2ff25ee

verified ·

1 Parent(s): 80fa2b5

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

hate_speech_demo.py +32 -13

hate_speech_demo.py CHANGED Viewed

@@ -507,33 +507,52 @@ class ContextualAPIUtils:
         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
     if error:
         return f"Error: {error}", "", "unsafe"
-    # Store the original response for safety level detection
-    original_response = response_text
     # Determine safety level based on response content
     safety_level = "safe"
-    if "out of policy" in original_response.lower() or "unsafe" in original_response.lower():
         safety_level = "unsafe"
-    elif "caution" in original_response.lower() or "warning" in original_response.lower():
         safety_level = "warning"
-    # Try to format the response (but keep the original if anything goes wrong)
     try:
-        # Simple formatting approach - just add line breaks between sentences
-        formatted_response = response_text.replace(". ", ".<br><br>")
-        # Ensure we don't have too many breaks
-        formatted_response = formatted_response.replace("<br><br><br>", "<br><br>")
     except Exception as e:
-        print(f"Error formatting response: {e}")
-        formatted_response = response_text
-    return formatted_response, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {

         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
+# Replace your get_contextual_rating function with this enhanced version
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
     if error:
         return f"Error: {error}", "", "unsafe"
     # Determine safety level based on response content
     safety_level = "safe"
+    if "out of policy" in response_text.lower() or "unsafe" in response_text.lower():
         safety_level = "unsafe"
+    elif "caution" in response_text.lower() or "warning" in response_text.lower():
         safety_level = "warning"
+    # Parse specific parts of the response
     try:
+        # Look for policy rating
+        rating_match = re.search(r'Policy Rating: (.*?)(?:Policy|$)', response_text, re.IGNORECASE)
+        policy_rating = rating_match.group(1).strip() if rating_match else ""
+        # Look for policy category
+        category_match = re.search(r'Policy Category: (.*?)(?:Explanation|$)', response_text, re.IGNORECASE)
+        policy_category = category_match.group(1).strip() if category_match else ""
+        # Look for explanation
+        explanation_match = re.search(r'Explanation: (.*)', response_text, re.IGNORECASE)
+        explanation = explanation_match.group(1).strip() if explanation_match else response_text
+        # Format with HTML
+        formatted_text = ""
+        if policy_rating:
+            formatted_text += f"<strong>Policy Rating:</strong> {policy_rating}<br><br>"
+        if policy_category:
+            formatted_text += f"<strong>Policy Category:</strong> {policy_category}<br><br>"
+        if explanation:
+            formatted_text += f"<strong>Explanation:</strong> {explanation}"
+        # If we couldn't extract anything, use the original
+        if not formatted_text:
+            formatted_text = response_text
     except Exception as e:
+        print(f"Error formatting rating: {e}")
+        formatted_text = response_text
+    return formatted_text, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {