Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import time | |
from moviepy.editor import VideoFileClip | |
from faster_whisper import WhisperModel | |
from pytube import YouTube | |
from pytube.exceptions import VideoUnavailable, PytubeError | |
# λΉλμ€λ₯Ό MP3λ‘ λ³ννλ ν¨μ | |
def convert_mp4_to_mp3(video_file_path, output_dir): | |
video = VideoFileClip(video_file_path) | |
audio = video.audio | |
output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3") | |
audio.write_audiofile(output_path) | |
audio.close() | |
video.close() | |
return output_path | |
# Whisper λͺ¨λΈμ μ¬μ©νμ¬ MP3 νμΌμ ν μ€νΈλ‘ λ³ννλ ν¨μ | |
def transcribe_audio(model_size, audio_file): | |
model = WhisperModel(model_size, device="cpu", compute_type="int8") | |
start_time = time.time() | |
try: | |
segments, info = model.transcribe(audio_file, beam_size=5) | |
detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability) | |
result = [] | |
for segment in segments: | |
result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) | |
result_text = "\n".join(result) | |
except PermissionError as e: | |
return f"PermissionError: {e}" | |
except ValueError as e: | |
return f"ValueError: {e}" | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds" | |
# YouTube URLμμ λΉλμ€λ₯Ό λ€μ΄λ‘λνλ ν¨μ | |
def download_youtube_video(url, output_dir): | |
try: | |
yt = YouTube(url) | |
stream = yt.streams.filter(file_extension='mp4').first() | |
output_path = stream.download(output_dir) | |
return output_path, None | |
except VideoUnavailable: | |
return None, "Video unavailable. Please check the URL." | |
except PytubeError as e: | |
return None, f"An error occurred: {e}" | |
# Gradio μΈν°νμ΄μ€μμ μ¬μ©ν λ©μΈ ν¨μ | |
def process_video(model_size, video_file=None, video_url=None): | |
if video_url and not video_file: | |
print(f"Downloading video from URL: {video_url}") | |
video_file_path, error = download_youtube_video(video_url, '/tmp') | |
if error: | |
print(f"Error downloading video: {error}") | |
return error | |
print(f"Downloaded video to: {video_file_path}") | |
elif video_file and not video_url: | |
video_file_path = video_file.name | |
print(f"Using uploaded video file: {video_file_path}") | |
else: | |
return "Please upload a video file or provide a video URL, but not both." | |
save_path = "/tmp" | |
mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path) | |
print(f"Converted video to MP3: {mp3_file_path}") | |
transcription = transcribe_audio(model_size, mp3_file_path) | |
print(f"Transcription complete") | |
return transcription | |
# Gradio μΈν°νμ΄μ€ μ μ | |
iface = gr.Interface( | |
fn=process_video, | |
inputs=[ | |
gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"), | |
gr.File(label="Upload Video File"), | |
gr.Textbox(label="Video URL") | |
], | |
outputs="text", | |
title="Video to Text Converter using Whisper", | |
description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text.", | |
live=True | |
) | |
if __name__ == "__main__": | |
iface.launch() | |