Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
d9d7e9a
1
Parent(s):
9d701db
frame rate issue
Browse files- app.py +5 -7
- requirements.txt +1 -0
app.py
CHANGED
@@ -33,7 +33,7 @@ def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=10):
|
|
33 |
|
34 |
def generate_speech(text):
|
35 |
print("Generating speech...")
|
36 |
-
tts = TTS("tts_models/en/ljspeech/tacotron2-DDC"
|
37 |
print("TTS model loaded")
|
38 |
tts.tts_to_file(text="Hello world!", file_path="test.wav")
|
39 |
wav_path = "speech.wav"
|
@@ -41,7 +41,7 @@ def generate_speech(text):
|
|
41 |
print("Speech generated")
|
42 |
return wav_path
|
43 |
|
44 |
-
def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps=
|
45 |
print("Generating images...")
|
46 |
image_paths = []
|
47 |
|
@@ -72,10 +72,8 @@ def create_video(images, durations, speech_path, image_size=(640, 480)):
|
|
72 |
for img, dur in zip(images, durations):
|
73 |
pil_image = Image.open(img)
|
74 |
pil_image = pil_image.resize(image_size, Image.Resampling.LANCZOS)
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
clip = mp.ImageClip(img_resized_path).set_duration(dur)
|
79 |
clips.append(clip)
|
80 |
|
81 |
black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
|
@@ -83,7 +81,7 @@ def create_video(images, durations, speech_path, image_size=(640, 480)):
|
|
83 |
video = mp.concatenate_videoclips([black_start] + clips + [black_end])
|
84 |
audio = mp.AudioFileClip(speech_path)
|
85 |
final_video = video.set_audio(audio)
|
86 |
-
final_video.write_videofile("output.mp4", fps=
|
87 |
return "output.mp4"
|
88 |
|
89 |
def process_text(text, image_size, use_diffusion, num_steps):
|
|
|
33 |
|
34 |
def generate_speech(text):
|
35 |
print("Generating speech...")
|
36 |
+
tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
|
37 |
print("TTS model loaded")
|
38 |
tts.tts_to_file(text="Hello world!", file_path="test.wav")
|
39 |
wav_path = "speech.wav"
|
|
|
41 |
print("Speech generated")
|
42 |
return wav_path
|
43 |
|
44 |
+
def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps=40):
|
45 |
print("Generating images...")
|
46 |
image_paths = []
|
47 |
|
|
|
72 |
for img, dur in zip(images, durations):
|
73 |
pil_image = Image.open(img)
|
74 |
pil_image = pil_image.resize(image_size, Image.Resampling.LANCZOS)
|
75 |
+
frame = np.array(pil_image) # Convert to NumPy array
|
76 |
+
clip = mp.ImageClip(frame).set_duration(dur)
|
|
|
|
|
77 |
clips.append(clip)
|
78 |
|
79 |
black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
|
|
|
81 |
video = mp.concatenate_videoclips([black_start] + clips + [black_end])
|
82 |
audio = mp.AudioFileClip(speech_path)
|
83 |
final_video = video.set_audio(audio)
|
84 |
+
final_video.write_videofile("output.mp4", fps=30)
|
85 |
return "output.mp4"
|
86 |
|
87 |
def process_text(text, image_size, use_diffusion, num_steps):
|
requirements.txt
CHANGED
@@ -3,6 +3,7 @@ torch
|
|
3 |
torchaudio
|
4 |
diffusers
|
5 |
transformers
|
|
|
6 |
TTS
|
7 |
moviepy
|
8 |
numpy
|
|
|
3 |
torchaudio
|
4 |
diffusers
|
5 |
transformers
|
6 |
+
accelerate
|
7 |
TTS
|
8 |
moviepy
|
9 |
numpy
|