Spaces:

jpjp9292
/

Speech-to-Text

Runtime error

App Files Files Community

Speech-to-Text / app.py

jpjp9292

Update app.py

83ebf54 verified 12 months ago

raw

history blame

3.44 kB

	import os
	import gradio as gr
	import time
	from moviepy.editor import VideoFileClip
	from faster_whisper import WhisperModel
	from pytube import YouTube
	from pytube.exceptions import VideoUnavailable, PytubeError

	# 비디오를 MP3로 변환하는 함수
	def convert_mp4_to_mp3(video_file_path, output_dir):
	video = VideoFileClip(video_file_path)
	audio = video.audio
	output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3")
	audio.write_audiofile(output_path)
	audio.close()
	video.close()
	return output_path

	# Whisper 모델을 사용하여 MP3 파일을 텍스트로 변환하는 함수
	def transcribe_audio(model_size, audio_file):
	model = WhisperModel(model_size, device="cpu", compute_type="int8")
	start_time = time.time()

	try:
	segments, info = model.transcribe(audio_file, beam_size=5)

	detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
	result = []
	for segment in segments:
	result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
	result_text = "\n".join(result)

	except PermissionError as e:
	return f"PermissionError: {e}"
	except ValueError as e:
	return f"ValueError: {e}"

	end_time = time.time()
	elapsed_time = end_time - start_time

	return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds"

	# YouTube URL에서 비디오를 다운로드하는 함수
	def download_youtube_video(url, output_dir):
	try:
	yt = YouTube(url)
	stream = yt.streams.filter(file_extension='mp4').first()
	output_path = stream.download(output_dir)
	return output_path, None
	except VideoUnavailable:
	return None, "Video unavailable. Please check the URL."
	except PytubeError as e:
	return None, f"An error occurred: {e}"

	# Gradio 인터페이스에서 사용할 메인 함수
	def process_video(model_size, video_file=None, video_url=None):
	if video_url and not video_file:
	print(f"Downloading video from URL: {video_url}")
	video_file_path, error = download_youtube_video(video_url, '/tmp')
	if error:
	print(f"Error downloading video: {error}")
	return error
	print(f"Downloaded video to: {video_file_path}")
	elif video_file and not video_url:
	video_file_path = video_file.name
	print(f"Using uploaded video file: {video_file_path}")
	else:
	return "Please upload a video file or provide a video URL, but not both."

	save_path = "/tmp"
	mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path)
	print(f"Converted video to MP3: {mp3_file_path}")
	transcription = transcribe_audio(model_size, mp3_file_path)
	print(f"Transcription complete")
	return transcription

	# Gradio 인터페이스 정의
	iface = gr.Interface(
	fn=process_video,
	inputs=[
	gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"),
	gr.File(label="Upload Video File"),
	gr.Textbox(label="Video URL")
	],
	outputs="text",
	title="Video to Text Converter using Whisper",
	description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text.",
	live=True
	)

	if __name__ == "__main__":
	iface.launch()