nezihtopaloglu commited on
Commit
96714cf
·
1 Parent(s): 3920bf8

added app file and requirements

Browse files
Files changed (2) hide show
  1. app.py +73 -3
  2. requirements.txt +9 -0
app.py CHANGED
@@ -1,7 +1,77 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
  demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from diffusers import StableDiffusionPipeline
5
+ from TTS.api import TTS
6
+ import moviepy.editor as mp
7
+ import numpy as np
8
+ import os
9
+ from PIL import Image
10
 
11
+ def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=10):
12
+ words = text.split()
13
+ chunks = []
14
+ current_chunk = []
15
+ current_duration = 0
16
+ for word in words:
17
+ current_chunk.append(word)
18
+ current_duration += 1 / words_per_second
19
+ if current_duration >= min_sec:
20
+ if current_duration >= max_sec or len(current_chunk) > 20:
21
+ chunks.append(" ".join(current_chunk))
22
+ current_chunk = []
23
+ current_duration = 0
24
+ if current_chunk:
25
+ chunks.append(" ".join(current_chunk))
26
+ return chunks
27
+
28
+ def generate_speech(text):
29
+ tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
30
+ wav_path = "speech.wav"
31
+ tts.tts_to_file(text=text, file_path=wav_path)
32
+ return wav_path
33
+
34
+ def generate_images(chunks):
35
+ pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
36
+ pipe.to("cuda" if torch.cuda.is_available() else "cpu")
37
+ image_paths = []
38
+ for i, chunk in enumerate(chunks):
39
+ image = pipe(chunk).images[0]
40
+ img_path = f"image_{i}.png"
41
+ image.save(img_path)
42
+ image_paths.append(img_path)
43
+ return image_paths
44
+
45
+ def create_video(images, durations, speech_path):
46
+ clips = [mp.ImageClip(img).set_duration(dur) for img, dur in zip(images, durations)]
47
+ black_start = mp.ColorClip((512, 512), color=(0,0,0), duration=1)
48
+ black_end = mp.ColorClip((512, 512), color=(0,0,0), duration=2)
49
+ video = mp.concatenate_videoclips([black_start] + clips + [black_end])
50
+ audio = mp.AudioFileClip(speech_path)
51
+ final_video = video.set_audio(audio)
52
+ final_video.write_videofile("output.mp4", fps=24)
53
+ return "output.mp4"
54
+
55
+ def process_text(text):
56
+ chunks = estimate_chunk_durations(text)
57
+ speech_path = generate_speech(text)
58
+ image_paths = generate_images(chunks)
59
+ durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
60
+ video_path = create_video(image_paths, durations, speech_path)
61
+ return video_path
62
+
63
+ with gr.Blocks() as demo:
64
+ gr.Markdown("# Text-to-Video Generator using AI 🎥")
65
+ text_input = gr.Textbox(label="Enter your text")
66
+ file_input = gr.File(label="Or upload a .txt file")
67
+ process_btn = gr.Button("Generate Video")
68
+ output_video = gr.Video()
69
+
70
+ def handle_request(text, file):
71
+ if file is not None:
72
+ text = open(file.name, "r").read()
73
+ return process_text(text)
74
+
75
+ process_btn.click(handle_request, inputs=[text_input, file_input], outputs=output_video)
76
 
 
77
  demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchaudio
4
+ diffusers
5
+ transformers
6
+ TTS
7
+ moviepy
8
+ numpy
9
+ Pillow