jpjp9292 commited on
Commit
50c4728
Β·
verified Β·
1 Parent(s): 40eb44b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import time
4
+ from moviepy.editor import VideoFileClip
5
+ from faster_whisper import WhisperModel
6
+
7
+ # λΉ„λ””μ˜€λ₯Ό MP3둜 λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
8
+ def convert_mp4_to_mp3(video_file_path, output_dir):
9
+ video = VideoFileClip(video_file_path)
10
+ audio = video.audio
11
+ output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3")
12
+ audio.write_audiofile(output_path)
13
+ audio.close()
14
+ video.close()
15
+ return output_path
16
+
17
+ # Whisper λͺ¨λΈμ„ μ‚¬μš©ν•˜μ—¬ MP3 νŒŒμΌμ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
18
+ def transcribe_audio(model_size, audio_file):
19
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
20
+ start_time = time.time()
21
+
22
+ try:
23
+ segments, info = model.transcribe(audio_file, beam_size=5)
24
+
25
+ detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
26
+ result = []
27
+ for segment in segments:
28
+ result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
29
+ result_text = "\n".join(result)
30
+
31
+ except PermissionError as e:
32
+ return f"PermissionError: {e}"
33
+ except ValueError as e:
34
+ return f"ValueError: {e}"
35
+
36
+ end_time = time.time()
37
+ elapsed_time = end_time - start_time
38
+
39
+ return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds"
40
+
41
+ # Gradio μΈν„°νŽ˜μ΄μŠ€μ—μ„œ μ‚¬μš©ν•  메인 ν•¨μˆ˜
42
+ def process_video(model_size, video_file=None, video_url=None):
43
+ if video_url:
44
+ video_file_path = gr.processing_utils.download_url(video_url, dir='/tmp')
45
+ elif video_file:
46
+ video_file_path = video_file.name
47
+ else:
48
+ return "Please upload a video file or provide a video URL."
49
+
50
+ save_path = "/tmp"
51
+ mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path)
52
+ transcription = transcribe_audio(model_size, mp3_file_path)
53
+ return transcription
54
+
55
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
56
+ iface = gr.Interface(
57
+ fn=process_video,
58
+ inputs=[
59
+ gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"),
60
+ gr.File(label="Upload Video File", optional=True),
61
+ gr.Textbox(label="Video URL", optional=True)
62
+ ],
63
+ outputs="text",
64
+ title="Video to Text Converter using Whisper",
65
+ description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text."
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ iface.launch()