Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spacy
|
3 |
+
from PIL import Image
|
4 |
+
import os
|
5 |
+
import cv2
|
6 |
+
|
7 |
+
# Load SpaCy model
|
8 |
+
nlp = spacy.load("en_core_web_sm")
|
9 |
+
|
10 |
+
ASSET_MAP = {
|
11 |
+
"man": "assets/characters/man.png",
|
12 |
+
"woman": "assets/characters/woman.png",
|
13 |
+
"dog": "assets/characters/dog.png",
|
14 |
+
"park": "assets/backgrounds/park.jpg",
|
15 |
+
"office": "assets/backgrounds/office.jpg"
|
16 |
+
}
|
17 |
+
|
18 |
+
FRAME_FOLDER = "frames"
|
19 |
+
VIDEO_OUTPUT = "generated_video.mp4"
|
20 |
+
|
21 |
+
def extract_entities(prompt):
|
22 |
+
doc = nlp(prompt)
|
23 |
+
characters = []
|
24 |
+
scenes = []
|
25 |
+
for ent in doc.ents:
|
26 |
+
if ent.label_ in ["PERSON", "ORG"]:
|
27 |
+
characters.append(ent.text.lower())
|
28 |
+
elif ent.label_ in ["LOC", "GPE", "FAC"]:
|
29 |
+
scenes.append(ent.text.lower())
|
30 |
+
return characters, scenes
|
31 |
+
|
32 |
+
def compose_frame(background_path, character_paths, output_path, char_positions=None):
|
33 |
+
bg = Image.open(background_path).convert('RGBA')
|
34 |
+
for idx, char_path in enumerate(character_paths):
|
35 |
+
char_img = Image.open(char_path).convert('RGBA')
|
36 |
+
pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
|
37 |
+
bg.paste(char_img, pos, char_img)
|
38 |
+
bg.save(output_path)
|
39 |
+
|
40 |
+
def create_video_from_frames(frame_folder, output_path, fps=24):
|
41 |
+
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
|
42 |
+
if not images:
|
43 |
+
print("No frames found!")
|
44 |
+
return
|
45 |
+
frame = cv2.imread(os.path.join(frame_folder, images[0]))
|
46 |
+
height, width, _ = frame.shape
|
47 |
+
|
48 |
+
video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
|
49 |
+
for img in images:
|
50 |
+
video.write(cv2.imread(os.path.join(frame_folder, img)))
|
51 |
+
video.release()
|
52 |
+
|
53 |
+
def generate_video(prompt):
|
54 |
+
characters, scenes = extract_entities(prompt)
|
55 |
+
|
56 |
+
if not scenes:
|
57 |
+
return None, "No scene detected! Please mention a place/location in your prompt."
|
58 |
+
|
59 |
+
os.makedirs(FRAME_FOLDER, exist_ok=True)
|
60 |
+
bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
|
61 |
+
char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
|
62 |
+
|
63 |
+
total_frames = 48 # Change to higher for longer video (e.g., 2880 for 2 mins)
|
64 |
+
for i in range(total_frames):
|
65 |
+
positions = [(100 + i*2, 200) for _ in char_paths]
|
66 |
+
frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
|
67 |
+
compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
|
68 |
+
|
69 |
+
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
|
70 |
+
return VIDEO_OUTPUT, f"Characters: {characters}, Scenes: {scenes}"
|
71 |
+
|
72 |
+
# Gradio Interface
|
73 |
+
iface = gr.Interface(
|
74 |
+
fn=generate_video,
|
75 |
+
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
|
76 |
+
outputs=[gr.Video(), gr.Textbox()],
|
77 |
+
title="Text to Video AI App (Gradio)"
|
78 |
+
)
|
79 |
+
|
80 |
+
iface.launch()
|