Spaces:

karthi311
/

trans

Sleeping

App Files Files Community

trans / app.py

karthi311

Update app.py

8efba48 verified 5 months ago

raw

history blame

2.34 kB

	import os
	import tempfile
	from subprocess import Popen, PIPE
	import torch
	import gradio as gr
	from transformers import pipeline
	from transformers.pipelines.audio_utils import ffmpeg_read
	from pydub import AudioSegment

	# Constants
	MODEL_NAME = "openai/whisper-large-v3-turbo"
	BATCH_SIZE = 8
	device = 0 if torch.cuda.is_available() else "cpu"

	# Whisper pipeline
	whisper_pipeline = pipeline(
	task="automatic-speech-recognition",
	model=MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)

	# Convert MP4 to MP3
	def convert_mp4_to_mp3(mp4_path, mp3_path):
	try:
	audio = AudioSegment.from_file(mp4_path, format="mp4")
	audio.export(mp3_path, format="mp3")
	except Exception as e:
	raise RuntimeError(f"Error converting MP4 to MP3: {e}")

	# Transcribe audio
	def transcribe_audio(audio_path):
	try:
	inputs = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate)
	inputs = {"array": inputs, "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate}
	result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False)
	return result["text"]
	except Exception as e:
	return f"Error during transcription: {e}"

	# Gradio Interface Function
	def transcribe_file(file):
	# Check file type and convert if necessary
	if file.name.endswith(".mp4"):
	temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
	try:
	convert_mp4_to_mp3(file.name, temp_mp3_path)
	audio_path = temp_mp3_path
	except Exception as e:
	return f"Error during MP4 to MP3 conversion: {e}"
	else:
	audio_path = file.name

	# Transcribe audio
	transcription = transcribe_audio(audio_path)

	# Clean up temporary files
	if file.name.endswith(".mp4"):
	os.remove(temp_mp3_path)

	return transcription

	# Gradio interface setup
	def launch_gradio():
	with gr.Blocks() as demo:
	gr.Markdown("# Audio Transcription with Whisper Model")

	gr.Interface(
	fn=transcribe_file,
	inputs=gr.File(label="Upload Audio/Video File (MP4 or MP3)"),
	outputs=gr.Textbox(label="Transcribed Text"),
	)

	demo.launch(share=True)

	# Run the Gradio app
	if __name__ == "__main__":
	launch_gradio()