trans / app.py
karthi311's picture
Update app.py
8efba48 verified
raw
history blame
2.34 kB
import os
import tempfile
from subprocess import Popen, PIPE
import torch
import gradio as gr
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
from pydub import AudioSegment
# Constants
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
# Whisper pipeline
whisper_pipeline = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Convert MP4 to MP3
def convert_mp4_to_mp3(mp4_path, mp3_path):
try:
audio = AudioSegment.from_file(mp4_path, format="mp4")
audio.export(mp3_path, format="mp3")
except Exception as e:
raise RuntimeError(f"Error converting MP4 to MP3: {e}")
# Transcribe audio
def transcribe_audio(audio_path):
try:
inputs = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate)
inputs = {"array": inputs, "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate}
result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False)
return result["text"]
except Exception as e:
return f"Error during transcription: {e}"
# Gradio Interface Function
def transcribe_file(file):
# Check file type and convert if necessary
if file.name.endswith(".mp4"):
temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
try:
convert_mp4_to_mp3(file.name, temp_mp3_path)
audio_path = temp_mp3_path
except Exception as e:
return f"Error during MP4 to MP3 conversion: {e}"
else:
audio_path = file.name
# Transcribe audio
transcription = transcribe_audio(audio_path)
# Clean up temporary files
if file.name.endswith(".mp4"):
os.remove(temp_mp3_path)
return transcription
# Gradio interface setup
def launch_gradio():
with gr.Blocks() as demo:
gr.Markdown("# Audio Transcription with Whisper Model")
gr.Interface(
fn=transcribe_file,
inputs=gr.File(label="Upload Audio/Video File (MP4 or MP3)"),
outputs=gr.Textbox(label="Transcribed Text"),
)
demo.launch(share=True)
# Run the Gradio app
if __name__ == "__main__":
launch_gradio()