Spaces:

nezihtopaloglu
/

text-to-video

Running on Zero

App Files Files Community

nezihtopaloglu commited on Feb 11

Commit

96714cf

1 Parent(s): 3920bf8

added app file and requirements

Browse files

Files changed (2) hide show

app.py +73 -3
requirements.txt +9 -0

app.py CHANGED Viewed

@@ -1,7 +1,77 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
+import torch
+import torchaudio
+from diffusers import StableDiffusionPipeline
+from TTS.api import TTS
+import moviepy.editor as mp
+import numpy as np
+import os
+from PIL import Image
+def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=10):
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_duration = 0
+    for word in words:
+        current_chunk.append(word)
+        current_duration += 1 / words_per_second
+        if current_duration >= min_sec:
+            if current_duration >= max_sec or len(current_chunk) > 20:
+                chunks.append(" ".join(current_chunk))
+                current_chunk = []
+                current_duration = 0
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def generate_speech(text):
+    tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
+    wav_path = "speech.wav"
+    tts.tts_to_file(text=text, file_path=wav_path)
+    return wav_path
+def generate_images(chunks):
+    pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
+    pipe.to("cuda" if torch.cuda.is_available() else "cpu")
+    image_paths = []
+    for i, chunk in enumerate(chunks):
+        image = pipe(chunk).images[0]
+        img_path = f"image_{i}.png"
+        image.save(img_path)
+        image_paths.append(img_path)
+    return image_paths
+def create_video(images, durations, speech_path):
+    clips = [mp.ImageClip(img).set_duration(dur) for img, dur in zip(images, durations)]
+    black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
+    black_end = mp.ColorClip((512, 512), color=(0,0,0), duration=2)
+    video = mp.concatenate_videoclips([black_start] + clips + [black_end])
+    audio = mp.AudioFileClip(speech_path)
+    final_video = video.set_audio(audio)
+    final_video.write_videofile("output.mp4", fps=24)
+    return "output.mp4"
+def process_text(text):
+    chunks = estimate_chunk_durations(text)
+    speech_path = generate_speech(text)
+    image_paths = generate_images(chunks)
+    durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
+    video_path = create_video(image_paths, durations, speech_path)
+    return video_path
+with gr.Blocks() as demo:
+    gr.Markdown("# Text-to-Video Generator using AI 🎥")
+    text_input = gr.Textbox(label="Enter your text")
+    file_input = gr.File(label="Or upload a .txt file")
+    process_btn = gr.Button("Generate Video")
+    output_video = gr.Video()
+    def handle_request(text, file):
+        if file is not None:
+            text = open(file.name, "r").read()
+        return process_text(text)
+    process_btn.click(handle_request, inputs=[text_input, file_input], outputs=output_video)
 demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+torch
+torchaudio
+diffusers
+transformers
+TTS
+moviepy
+numpy
+Pillow