nezihtopaloglu commited on
Commit
d9d7e9a
·
1 Parent(s): 9d701db

frame rate issue

Browse files
Files changed (2) hide show
  1. app.py +5 -7
  2. requirements.txt +1 -0
app.py CHANGED
@@ -33,7 +33,7 @@ def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=10):
33
 
34
  def generate_speech(text):
35
  print("Generating speech...")
36
- tts = TTS("tts_models/en/ljspeech/tacotron2-DDC", gpu=False)
37
  print("TTS model loaded")
38
  tts.tts_to_file(text="Hello world!", file_path="test.wav")
39
  wav_path = "speech.wav"
@@ -41,7 +41,7 @@ def generate_speech(text):
41
  print("Speech generated")
42
  return wav_path
43
 
44
- def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps=5):
45
  print("Generating images...")
46
  image_paths = []
47
 
@@ -72,10 +72,8 @@ def create_video(images, durations, speech_path, image_size=(640, 480)):
72
  for img, dur in zip(images, durations):
73
  pil_image = Image.open(img)
74
  pil_image = pil_image.resize(image_size, Image.Resampling.LANCZOS)
75
- img_resized_path = f"resized_{os.path.basename(img)}"
76
- pil_image.save(img_resized_path)
77
-
78
- clip = mp.ImageClip(img_resized_path).set_duration(dur)
79
  clips.append(clip)
80
 
81
  black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
@@ -83,7 +81,7 @@ def create_video(images, durations, speech_path, image_size=(640, 480)):
83
  video = mp.concatenate_videoclips([black_start] + clips + [black_end])
84
  audio = mp.AudioFileClip(speech_path)
85
  final_video = video.set_audio(audio)
86
- final_video.write_videofile("output.mp4", fps=24)
87
  return "output.mp4"
88
 
89
  def process_text(text, image_size, use_diffusion, num_steps):
 
33
 
34
  def generate_speech(text):
35
  print("Generating speech...")
36
+ tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
37
  print("TTS model loaded")
38
  tts.tts_to_file(text="Hello world!", file_path="test.wav")
39
  wav_path = "speech.wav"
 
41
  print("Speech generated")
42
  return wav_path
43
 
44
+ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps=40):
45
  print("Generating images...")
46
  image_paths = []
47
 
 
72
  for img, dur in zip(images, durations):
73
  pil_image = Image.open(img)
74
  pil_image = pil_image.resize(image_size, Image.Resampling.LANCZOS)
75
+ frame = np.array(pil_image) # Convert to NumPy array
76
+ clip = mp.ImageClip(frame).set_duration(dur)
 
 
77
  clips.append(clip)
78
 
79
  black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
 
81
  video = mp.concatenate_videoclips([black_start] + clips + [black_end])
82
  audio = mp.AudioFileClip(speech_path)
83
  final_video = video.set_audio(audio)
84
+ final_video.write_videofile("output.mp4", fps=30)
85
  return "output.mp4"
86
 
87
  def process_text(text, image_size, use_diffusion, num_steps):
requirements.txt CHANGED
@@ -3,6 +3,7 @@ torch
3
  torchaudio
4
  diffusers
5
  transformers
 
6
  TTS
7
  moviepy
8
  numpy
 
3
  torchaudio
4
  diffusers
5
  transformers
6
+ accelerate
7
  TTS
8
  moviepy
9
  numpy