File size: 3,461 Bytes
09bf646
 
 
 
 
 
 
 
 
a6f7584
 
 
09bf646
 
 
 
 
a6f7584
09bf646
 
a6f7584
09bf646
 
 
 
a6f7584
09bf646
a6f7584
09bf646
a6f7584
 
09bf646
 
 
 
a6f7584
09bf646
 
 
 
 
 
 
 
 
 
 
 
a6f7584
09bf646
 
a6f7584
09bf646
 
 
 
a6f7584
 
09bf646
a6f7584
09bf646
a6f7584
 
09bf646
 
 
 
 
 
 
a6f7584
09bf646
 
 
 
 
 
 
 
 
af444fc
a6f7584
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import requests

# -----------------------------------------------------------
# SETUP: Hugging Face API and Models
# -----------------------------------------------------------
HUGGINGFACE_API_KEY = "your_huggingface_api_key"  # Replace with your API key from https://huggingface.co/settings/tokens
HEADERS = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}

# Hugging Face Model Endpoints (Using Pretrained Models)
CLASSIFIER_API_URL = "https://api-inference.huggingface.co/models/Hate-speech-CNERG/bert-base-uncased-hatexplain"
GENERATOR_API_URL = "https://api-inference.huggingface.co/models/KAXY/GPT2-against-hate"

# -----------------------------------------------------------
# FUNCTION DEFINITIONS
# -----------------------------------------------------------
def detect_harmful_content(text):
    """Detects harmful content using a Hugging Face model."""
    payload = {"inputs": text}
    response = requests.post(CLASSIFIER_API_URL, headers=HEADERS, json=payload)

    if response.status_code != 200:
        return [{"category": "Error", "score": 0, "message": "Failed to fetch response"}]
    
    results = response.json()

    detected = []
    threshold = 0.5  # Adjust threshold for sensitivity
    for result in results:
        if result.get('score', 0) >= threshold:
            detected.append({"category": result.get('label', 'Unknown'), "score": result.get('score', 0)})
    
    return detected

def generate_mitigation_response(text, detected_categories):
    """Generates a mitigation response based on detected harmful speech."""
    if not detected_categories:
        return "βœ… Content appears safe. No harmful content detected."

    categories_str = ", ".join([cat["category"] for cat in detected_categories])
    prompt = (f"The following content has been flagged for {categories_str}:\n\n"
              f"\"{text}\"\n\n"
              "Please generate a respectful and informative moderation response.")

    payload = {"inputs": prompt, "parameters": {"max_length": 150}}
    response = requests.post(GENERATOR_API_URL, headers=HEADERS, json=payload)

    if response.status_code != 200:
        return "⚠️ Error: Could not generate a response."

    generated = response.json()
    return generated[0].get('generated_text', "No response generated.")

# -----------------------------------------------------------
# STREAMLIT USER INTERFACE
# -----------------------------------------------------------
st.set_page_config(page_title="Hate Speech Detector", layout="centered")

st.title("πŸ” AI-Powered Hate Speech Detection & Mitigation")
st.markdown("Detects **hate speech, misinformation, and cyberbullying** in social media posts.")

# User Input
user_input = st.text_area("✏️ Enter the text to analyze:", height=150)

if st.button("Analyze"):
    if user_input.strip() == "":
        st.error("⚠️ Please enter some text to analyze.")
    else:
        st.markdown("### πŸ“Š Analysis Results")
        detected = detect_harmful_content(user_input)

        if detected and detected[0].get("category") != "Error":
            for d in detected:
                st.write(f"**Category:** {d['category']} | **Confidence:** {d['score']:.2f}")
        else:
            st.write("βœ… No harmful content detected.")

        st.markdown("### πŸ’‘ Mitigation Response")
        mitigation_response = generate_mitigation_response(user_input, detected)
        st.write(mitigation_response)