SimpleFrog commited on
Commit
6a2a16b
·
verified ·
1 Parent(s): 868a787

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import torch
3
  import tempfile
4
  import os
 
 
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
  from peft import PeftModel
7
 
@@ -40,19 +42,22 @@ if uploaded_file is not None:
40
 
41
  # Charger et traiter l'audio
42
  st.write("📄 **Transcription en cours...**")
43
-
44
- # Charger l'audio
45
- audio_input = processor(temp_audio_path, return_tensors="pt", sampling_rate=16000)
46
- input_features = audio_input.input_features
47
 
48
- # Générer la transcription
 
 
 
 
 
 
 
49
  with torch.no_grad():
50
- predicted_ids = model.generate(input_features)
51
 
52
- # Décoder la sortie
53
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
54
-
55
- # Afficher la transcription
56
  st.subheader("📝 Transcription :")
57
  st.text_area("", transcription, height=200)
58
 
@@ -60,3 +65,4 @@ if uploaded_file is not None:
60
  os.remove(temp_audio_path)
61
 
62
  st.write("🔹 Modèle utilisé :", "Whisper Large + Adaptateur LoRA (SimpleFrog/whisper_finetuned)")
 
 
2
  import torch
3
  import tempfile
4
  import os
5
+ import librosa
6
+ import numpy as np
7
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
8
  from peft import PeftModel
9
 
 
42
 
43
  # Charger et traiter l'audio
44
  st.write("📄 **Transcription en cours...**")
 
 
 
 
45
 
46
+ # 🔹 Charger l'audio et convertir en waveform
47
+ audio, sr = librosa.load(temp_audio_path, sr=16000) # Whisper attend du 16kHz
48
+ audio = np.expand_dims(audio, axis=0) # Ajouter une dimension batch
49
+
50
+ # 🔹 Préparer les entrées pour Whisper
51
+ inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
52
+
53
+ # 🔹 Générer la transcription
54
  with torch.no_grad():
55
+ predicted_ids = model.generate(inputs.input_features)
56
 
57
+ # 🔹 Décoder la sortie
58
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
59
+
60
+ # 🔹 Afficher la transcription
61
  st.subheader("📝 Transcription :")
62
  st.text_area("", transcription, height=200)
63
 
 
65
  os.remove(temp_audio_path)
66
 
67
  st.write("🔹 Modèle utilisé :", "Whisper Large + Adaptateur LoRA (SimpleFrog/whisper_finetuned)")
68
+