Jerich commited on
Commit
0cce76f
·
verified ·
1 Parent(s): eb35d5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -75,8 +75,11 @@ NLLB_LANGUAGE_CODES = {
75
 
76
  # Define a list of inappropriate words for content filtering
77
  INAPPROPRIATE_WORDS = [
78
- "profanity", "obscenity", "obscene", "offensive", "vulgar", "explicit",
79
- # Add more words as needed or load from a separate file
 
 
 
80
  ]
81
 
82
  # Function to check if text contains inappropriate content
@@ -604,7 +607,15 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
604
  inputs = processor(waveform.numpy()[0], sampling_rate=16000, return_tensors="pt").to(device)
605
  with torch.no_grad():
606
  language = "en" if source_code == "eng" else "tl" if source_code == "tgl" else None
607
- generated_ids = model.generate(**inputs, language=language, task="transcribe")
 
 
 
 
 
 
 
 
608
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
609
  else:
610
  processor = stt_mms_processor
 
75
 
76
  # Define a list of inappropriate words for content filtering
77
  INAPPROPRIATE_WORDS = [
78
+ "fuck", "shit", "bitch", "asshole", "damn", "cunt", "whore", "bastard",
79
+ "son of a bitch", "dick", "pussy", "motherfucker", "agka baboy",
80
+ "puta", "putang ina", "gago", "tanga", "hayop", "ulol", "lintik", "animal ka",
81
+ "paki", "pakyu", "yawa", "bungol", "gingan", "yawa ka", "peste", "irig",
82
+ "pakit", "ayat", "pua", "kayat mo ti agsardeng", "hinampak", "iring ka"
83
  ]
84
 
85
  # Function to check if text contains inappropriate content
 
607
  inputs = processor(waveform.numpy()[0], sampling_rate=16000, return_tensors="pt").to(device)
608
  with torch.no_grad():
609
  language = "en" if source_code == "eng" else "tl" if source_code == "tgl" else None
610
+ # Explicitly avoid forced_decoder_ids conflict
611
+ generation_config = model.generation_config
612
+ generation_config.task = "transcribe"
613
+ generation_config.language = f"<|{language}|>" if language else None
614
+ generated_ids = model.generate(
615
+ **inputs,
616
+ generation_config=generation_config,
617
+ max_length=448
618
+ )
619
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
620
  else:
621
  processor = stt_mms_processor