Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from pydub import AudioSegment | |
import os | |
import soundfile as sf | |
import numpy as np | |
from typing import Union | |
import torch | |
import librosa | |
import requests | |
from transformers import AutoModelForCausalLM | |
from transformers import AutoTokenizer | |
from huggingface_hub import login | |
from huggingface_hub import hf_hub_download | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import torchaudio | |
from functools import lru_cache | |
key = os.environ['access_token'] | |
login(token = key) | |
def load_model() : | |
tokenizer = AutoTokenizer.from_pretrained("Zelyanoth/my_fon_translation_model", token=True, src_lang="fon_Latn") | |
model = AutoModelForSeq2SeqLM.from_pretrained("Zelyanoth/my_fon_translation_model") | |
tokenizer_tr = AutoTokenizer.from_pretrained("google/flan-t5-xl") | |
model_tr = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl", torch_dtype=torch.float16) | |
transcription_pipeline = pipeline("automatic-speech-recognition", model="Zelyanoth/wav2vec2-bert-fon-colab") | |
return tokenizer_tr, model_tr, transcription_pipeline,tokenizer,model | |
tokenizer_tr,model_tr,transcription_pipeline,tokenizer,model = load_model() | |
class MONDJEMIN_AI(object) : | |
"THE AI PART USED IN THE Project" | |
def __init__(self,audio) : | |
self.audio = audio | |
def formater(self, nom_fichier_wav): | |
"permet de formatter le fichier pour qu il soit gérable par le transcripteur c est son bro sure" | |
formats = os.path.splitext(self.audio)[1][1:] | |
audio = AudioSegment.from_file(self.audio, format=formats) | |
audio.export(nom_fichier_wav, format="wav") | |
y, sr = librosa.load(nom_fichier_wav, sr=16000) | |
frame_length = 400 | |
hop_length = 160 | |
amplitude = np.abs(y) | |
energy = np.array([ | |
sum(abs(y[i:i+frame_length]**2)) | |
for i in range(0, len(y), hop_length) | |
]) | |
fenetre = np.percentile(energy, 10) | |
silence_indices = np.where(energy < fenetre)[0] | |
silence_times = librosa.frames_to_time(silence_indices, sr=sr, hop_length=hop_length) | |
min_silence_duration = 0.5 | |
merged_silences = [] | |
current_silence_start = silence_times[0] | |
for i in range(1, len(silence_times)): | |
if silence_times[i] - silence_times[i - 1] > min_silence_duration: | |
current_silence_end = silence_times[i - 1] | |
merged_silences.append((current_silence_start, current_silence_end)) | |
current_silence_start = silence_times[i] | |
merged_silences.append((current_silence_start, silence_times[-1])) | |
segments_boundaries = [0] + [end for start, end in merged_silences] + [len(y) / sr] | |
segments = [] | |
min_segment_duration = 1.0 | |
segments_files_names = [] | |
for i in range(len(segments_boundaries) - 1): | |
start_sample = int(segments_boundaries[i] * sr) | |
end_sample = int(segments_boundaries[i + 1] * sr) | |
segment = y[start_sample:end_sample] | |
segment_duration = (end_sample - start_sample) / sr | |
if segment_duration >= min_segment_duration: | |
segments.append(segment) | |
output_filename = f'segment_{i+1}.wav' | |
sf.write(output_filename, segment, sr) | |
if len(segments) == 0: | |
output_filename = 'segment_1.wav' | |
sf.write(output_filename, y, sr) | |
segments.append(y) | |
return segments | |
# # Permettre l'écoute de chaque segment | |
# for i, segment in enumerate(segments): | |
# print(f'Écouter le segment {i+1}') | |
# display(Audio(segment, rate=sr)) | |
# chemin_dossier = "./audio/" | |
# if duree_audio_ms > duree_segment_ms : | |
# segments_audio = [] | |
# debut_segment = 0 | |
# fin_segment = duree_segment_ms | |
# while debut_segment < duree_audio_ms: | |
# segment = audio[debut_segment:fin_segment] | |
# nombre_dossiers = 0 | |
# if not os.path.exists(chemin_dossier): | |
# os.makedirs(chemin_dossier) | |
# for element in os.listdir(chemin_dossier): | |
# # Vérifier si l'élément est un dossier | |
# if os.path.isdir(os.path.join(chemin_dossier, element)): | |
# # Incrémenter le compteur | |
# nombre_dossiers += 1 | |
# folder = "./audio/split"+str(nombre_dossiers) | |
# os.makedirs(folder) | |
# outputfile = folder+"/"+nom_fichier_wav+ str(fin_segment) | |
# segment.export(outputfile , format="wav") | |
# segments_audio.append(outputfile ) | |
# # Mettre à jour les indices pour le prochain segment | |
# debut_segment = fin_segment | |
# fin_segment += duree_segment_ms | |
# return segments_audio | |
def transcriptor(self) : | |
"Fais une transcription de l audio en fon nyehehe" | |
global transcription_pipeline | |
audio = self.formater("test.wav") | |
if isinstance(audio, list) : | |
trans = [] | |
for a in audio : | |
transcription = transcription_pipeline(a) | |
trans.append(transcription['text']) | |
transcr = ", ".join(trans) | |
return transcr | |
else : | |
transcription = transcription_pipeline(audio) | |
return transcription['text'] | |
def translate(self,inputt) : | |
lop = [] | |
for a in inputt : | |
inputs = tokenizer(a, return_tensors="pt") | |
translated_tokens = model.generate( | |
**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], | |
) | |
lop.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]) | |
return ' \n'.join(lop) |