Spaces:
Sleeping
Sleeping
File size: 2,981 Bytes
9012865 2cefc9c 9012865 7f6e4b6 627e7f5 338a103 627e7f5 338a103 627e7f5 7f6e4b6 627e7f5 7f6e4b6 627e7f5 7f6e4b6 627e7f5 7f6e4b6 627e7f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
cache_dir = "/tmp/huggingface_cache"
if not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = cache_dir
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
from googletrans import Translator
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
app = FastAPI()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=256,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
@app.post("/voice_recognition")
async def process_audio(file: UploadFile = File(...)):
try:
# File
file_path = f"{file.filename}"
with open(file_path, "wb") as f:
f.write(file.file.read())
# JP
original = pipe(file_path)
original_version = original["text"]
# EN
result = pipe(file_path, generate_kwargs={"task": "translate"})
hasil = result["text"]
# ID
detect = detect_google(hasil)
id_ver = translate_google(hasil, f"{detect}", "ID")
# Additional modifications
id_ver = modify_text(id_ver)
return JSONResponse(content={"response": {"jp_text": original_version, "en_text": hasil, "id_text": id_ver}}, status_code=200)
except Exception as e:
return HTTPException(status_code=500, detail=f"Error: {e}")
def detect_google(text):
try:
translator = Translator()
detected_lang = translator.detect(text)
return detected_lang.lang.upper()
except Exception as e:
print(f"Error detect: {e}")
return None
def translate_google(text, source, target):
try:
translator = Translator()
translated_text = translator.translate(text, src=source, dest=target)
return translated_text.text
except Exception as e:
print(f"Error translate: {e}")
return None
def modify_text(text):
# Additional modifications, case-sensitive
replacements = {
"Tuan": "Master",
"tuan": "Master",
"Guru": "Master",
"guru": "Master",
"Monica": "Monika",
"monica": "Monika",
}
for original, replacement in replacements.items():
text = text.replace(original, replacement)
return text |