Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -75,8 +75,11 @@ NLLB_LANGUAGE_CODES = {
|
|
75 |
|
76 |
# Define a list of inappropriate words for content filtering
|
77 |
INAPPROPRIATE_WORDS = [
|
78 |
-
"
|
79 |
-
|
|
|
|
|
|
|
80 |
]
|
81 |
|
82 |
# Function to check if text contains inappropriate content
|
@@ -604,7 +607,15 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
|
|
604 |
inputs = processor(waveform.numpy()[0], sampling_rate=16000, return_tensors="pt").to(device)
|
605 |
with torch.no_grad():
|
606 |
language = "en" if source_code == "eng" else "tl" if source_code == "tgl" else None
|
607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
608 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
609 |
else:
|
610 |
processor = stt_mms_processor
|
|
|
75 |
|
76 |
# Define a list of inappropriate words for content filtering
|
77 |
INAPPROPRIATE_WORDS = [
|
78 |
+
"fuck", "shit", "bitch", "asshole", "damn", "cunt", "whore", "bastard",
|
79 |
+
"son of a bitch", "dick", "pussy", "motherfucker", "agka baboy",
|
80 |
+
"puta", "putang ina", "gago", "tanga", "hayop", "ulol", "lintik", "animal ka",
|
81 |
+
"paki", "pakyu", "yawa", "bungol", "gingan", "yawa ka", "peste", "irig",
|
82 |
+
"pakit", "ayat", "pua", "kayat mo ti agsardeng", "hinampak", "iring ka"
|
83 |
]
|
84 |
|
85 |
# Function to check if text contains inappropriate content
|
|
|
607 |
inputs = processor(waveform.numpy()[0], sampling_rate=16000, return_tensors="pt").to(device)
|
608 |
with torch.no_grad():
|
609 |
language = "en" if source_code == "eng" else "tl" if source_code == "tgl" else None
|
610 |
+
# Explicitly avoid forced_decoder_ids conflict
|
611 |
+
generation_config = model.generation_config
|
612 |
+
generation_config.task = "transcribe"
|
613 |
+
generation_config.language = f"<|{language}|>" if language else None
|
614 |
+
generated_ids = model.generate(
|
615 |
+
**inputs,
|
616 |
+
generation_config=generation_config,
|
617 |
+
max_length=448
|
618 |
+
)
|
619 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
620 |
else:
|
621 |
processor = stt_mms_processor
|