Spaces:
Runtime error
Runtime error
import streamlit as st | |
from keras.models import load_model | |
import nltk | |
import re | |
from nltk.corpus import stopwords | |
from nltk.tokenize import TweetTokenizer | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import subprocess | |
# Command to execute | |
command = "git clone https://huggingface.co/lydiadida/lstmhatespeachdetection" | |
# Execute the command | |
try: | |
subprocess.run(command, shell=True, check=True) | |
print("Git clone command executed successfully.") | |
except subprocess.CalledProcessError as e: | |
print(f"Error executing git clone command: {e}") | |
# Load the LSTM model | |
model_path = "model.h5" # Set your model path here | |
def load_lstm_model(model_path): | |
return load_model(model_path) | |
def clean_text(text): | |
# Remove stopwords | |
stop_words = set(stopwords.words('english')) | |
words = nltk.word_tokenize(text) | |
filtered_words = [word for word in words if word not in stop_words] | |
# Remove Twitter usernames | |
text = re.sub(r'@\w+', '', ' '.join(filtered_words)) | |
# Remove URLs | |
text = re.sub(r'http\S+', '', text) | |
# Tokenize using TweetTokenizer | |
tokenizer = TweetTokenizer(preserve_case=True) | |
text = tokenizer.tokenize(text) | |
# Remove hashtag symbols | |
text = [word.replace('#', '') for word in text] | |
# Remove short words | |
text = ' '.join([word.lower() for word in text if len(word) > 2]) | |
# Remove digits | |
text = re.sub(r'\d+', '', text) | |
# Remove non-alphanumeric characters | |
text = re.sub(r'[^a-zA-Z\s]', '', text) | |
return text | |
def preprocess_text(text): | |
# Clean the text | |
cleaned_text = clean_text(text) | |
# Tokenize and pad sequences | |
token = Tokenizer() | |
token.fit_on_texts([cleaned_text]) | |
text_sequences = token.texts_to_sequences([cleaned_text]) | |
padded_sequences = pad_sequences(text_sequences, maxlen=100) | |
return padded_sequences | |
# Function to predict hate speech | |
def predict_hate_speech(text, lstm_model): | |
# Preprocess the text | |
padded_sequences = preprocess_text(text) | |
prediction = lstm_model.predict(padded_sequences) | |
return prediction | |
# Main function to run the Streamlit app | |
def main(): | |
# Set up Streamlit UI | |
st.title("Hate Speech Detection") | |
st.write("Enter text below to detect hate speech:") | |
input_text = st.text_area("Input Text", "") | |
if st.button("Detect Hate Speech"): | |
if input_text: | |
# Load the model | |
lstm_model = load_lstm_model(model_path) | |
# Predict hate speech | |
prediction = predict_hate_speech(input_text, lstm_model) | |
if prediction > 0.5: | |
st.error("Hate Speech Detected") | |
else: | |
st.success("No Hate Speech Detected") | |
else: | |
st.warning("Please enter some text") | |
# Run the app | |
if __name__ == "__main__": | |
main() | |