Spaces:
Runtime error
Runtime error
File size: 3,439 Bytes
50c4728 d03632e 9e780c8 50c4728 83ebf54 50c4728 83ebf54 50c4728 83ebf54 d03632e 9e780c8 83ebf54 9e780c8 d03632e 83ebf54 50c4728 38e25e4 83ebf54 9e780c8 83ebf54 9e780c8 83ebf54 38e25e4 50c4728 83ebf54 50c4728 38e25e4 50c4728 83ebf54 50c4728 83ebf54 50c4728 83ebf54 50c4728 38e25e4 50c4728 38e25e4 50c4728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import gradio as gr
import time
from moviepy.editor import VideoFileClip
from faster_whisper import WhisperModel
from pytube import YouTube
from pytube.exceptions import VideoUnavailable, PytubeError
# λΉλμ€λ₯Ό MP3λ‘ λ³ννλ ν¨μ
def convert_mp4_to_mp3(video_file_path, output_dir):
video = VideoFileClip(video_file_path)
audio = video.audio
output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3")
audio.write_audiofile(output_path)
audio.close()
video.close()
return output_path
# Whisper λͺ¨λΈμ μ¬μ©νμ¬ MP3 νμΌμ ν
μ€νΈλ‘ λ³ννλ ν¨μ
def transcribe_audio(model_size, audio_file):
model = WhisperModel(model_size, device="cpu", compute_type="int8")
start_time = time.time()
try:
segments, info = model.transcribe(audio_file, beam_size=5)
detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
result = []
for segment in segments:
result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
result_text = "\n".join(result)
except PermissionError as e:
return f"PermissionError: {e}"
except ValueError as e:
return f"ValueError: {e}"
end_time = time.time()
elapsed_time = end_time - start_time
return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds"
# YouTube URLμμ λΉλμ€λ₯Ό λ€μ΄λ‘λνλ ν¨μ
def download_youtube_video(url, output_dir):
try:
yt = YouTube(url)
stream = yt.streams.filter(file_extension='mp4').first()
output_path = stream.download(output_dir)
return output_path, None
except VideoUnavailable:
return None, "Video unavailable. Please check the URL."
except PytubeError as e:
return None, f"An error occurred: {e}"
# Gradio μΈν°νμ΄μ€μμ μ¬μ©ν λ©μΈ ν¨μ
def process_video(model_size, video_file=None, video_url=None):
if video_url and not video_file:
print(f"Downloading video from URL: {video_url}")
video_file_path, error = download_youtube_video(video_url, '/tmp')
if error:
print(f"Error downloading video: {error}")
return error
print(f"Downloaded video to: {video_file_path}")
elif video_file and not video_url:
video_file_path = video_file.name
print(f"Using uploaded video file: {video_file_path}")
else:
return "Please upload a video file or provide a video URL, but not both."
save_path = "/tmp"
mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path)
print(f"Converted video to MP3: {mp3_file_path}")
transcription = transcribe_audio(model_size, mp3_file_path)
print(f"Transcription complete")
return transcription
# Gradio μΈν°νμ΄μ€ μ μ
iface = gr.Interface(
fn=process_video,
inputs=[
gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"),
gr.File(label="Upload Video File"),
gr.Textbox(label="Video URL")
],
outputs="text",
title="Video to Text Converter using Whisper",
description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text.",
live=True
)
if __name__ == "__main__":
iface.launch()
|