import gradio as gr from transformers import pipeline import numpy as np import pandas as pd import re from pydub import AudioSegment from pydub.generators import Sine import io MODEL_NAME = "openai/whisper-tiny" BATCH_SIZE = 8 # device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, # device=device, ) arabic_bad_Words = pd.read_csv("arabic_bad_words_dataset.csv") english_bad_Words = pd.read_csv("english_bad_words_dataset.csv") def clean_english_word(word): # Use regex to remove special characters, punctuation, and spaces around words cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word) return cleaned_text def clean_arabic_word(word): # Define a regex pattern to match any non-Arabic letter character pattern = r'[^\u0600-\u06FF]' # Replace any character matching the pattern with an empty string cleaned_word = re.sub(pattern, '', word) return cleaned_word def classifier(word_list_with_timestamp, language): foul_words = [] negative_timestamps = [] if language == "English": list_to_search = set(english_bad_Words["words"]) for item in word_list_with_timestamp: word = clean_english_word(item['text']) if word in list_to_search: foul_words.append(word) negative_timestamps.append(item['timestamp']) else: list_to_search = list(arabic_bad_Words["words"]) for item in word_list_with_timestamp: word = clean_arabic_word(item['text']) for word_in_list in list_to_search: if word_in_list == word: foul_words.append(word) negative_timestamps.append(item['timestamp']) break return [foul_words, negative_timestamps] def generate_bleep(duration_ms, frequency=1000): sine_wave = Sine(frequency) bleep = sine_wave.to_audio_segment(duration=duration_ms) return bleep def mute_audio_range(audio_filepath, ranges, bleep_frequency=800): audio = AudioSegment.from_file(audio_filepath) for range in ranges: start_time = range[0] - 0.1 end_time = range[-1] + 0.1 start_ms = start_time * 1000 # pydub works with milliseconds end_ms = end_time * 1000 duration_ms = end_ms - start_ms # Generate the bleep sound bleep_sound = generate_bleep(duration_ms, bleep_frequency) # Combine the original audio with the bleep sound audio = audio[:start_ms] + bleep_sound + audio[end_ms:] return audio def format_output_to_list(data): formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data]) return formatted_list def transcribe(input_audio, audio_language, task, timestamp_type): if input_audio is None: raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") if timestamp_type == "sentence": timestamp_type = True else: timestamp_type = "word" output = pipe(input_audio, batch_size=BATCH_SIZE, return_timestamps=timestamp_type, generate_kwargs={"task": task}) text = output['text'] timestamps = format_output_to_list(output['chunks']) foul_words, negative_timestamps = classifier(output['chunks'], audio_language) foul_words = ", ".join(foul_words) audio_output = mute_audio_range(input_audio, negative_timestamps) # Save the output audio to a BytesIO object output_buffer = io.BytesIO() audio_output.export(output_buffer, format="wav") output_buffer.seek(0) # Read the audio data from the BytesIO buffer sample_rate = audio_output.frame_rate audio_data = np.frombuffer(output_buffer.read(), dtype=np.int16) return [text, timestamps, foul_words, (sample_rate, audio_data)] examples = [ ["arabic_english_audios/audios/arabic_audio_1.wav", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/arabic_audio_2.wav", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/arabic_audio_3.wav", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/arabic_hate_audio_1.mp3", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/arabic_hate_audio_2.flac", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/arabic_hate_audio_3.mp3", 'Arabic', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_1.wav", 'English', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_2.mp3", 'English', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_3.mp3", 'English', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_4.mp3", 'English', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_5.mp3", 'English', 'transcribe', 'word'], ["arabic_english_audios/audios/english_audio_6.wav", 'English', 'transcribe', 'word'] ] with gr.Blocks(theme=gr.themes.Default()) as demo: gr.HTML("