Update app.py
Browse files
app.py
CHANGED
@@ -1,73 +1,26 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from PIL import Image
|
4 |
import os
|
5 |
import cv2
|
6 |
-
import subprocess
|
7 |
|
8 |
-
# ---
|
9 |
-
|
10 |
-
nlp = spacy.load("en_core_web_sm")
|
11 |
-
except OSError:
|
12 |
-
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
13 |
-
nlp = spacy.load("en_core_web_sm")
|
14 |
-
|
15 |
-
# --- Define asset mapping (character and background files) ---
|
16 |
-
ASSET_MAP = {
|
17 |
-
"man": "assets/characters/man.png",
|
18 |
-
"woman": "assets/characters/woman.png",
|
19 |
-
"dog": "assets/characters/dog.png",
|
20 |
-
"park": "assets/backgrounds/park.jpg",
|
21 |
-
"office": "assets/backgrounds/office.jpg",
|
22 |
-
"home": "assets/backgrounds/home.jpg",
|
23 |
-
"school": "assets/backgrounds/school.jpg",
|
24 |
-
"street": "assets/backgrounds/street.jpg"
|
25 |
-
}
|
26 |
|
27 |
FRAME_FOLDER = "frames"
|
28 |
VIDEO_OUTPUT = "generated_video.mp4"
|
29 |
|
30 |
-
# ---
|
31 |
-
def
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# Named Entity Recognition
|
37 |
-
for ent in doc.ents:
|
38 |
-
if ent.label_ in ["PERSON", "ORG"]:
|
39 |
-
characters.append(ent.text.lower())
|
40 |
-
elif ent.label_ in ["LOC", "GPE", "FAC"]:
|
41 |
-
scenes.append(ent.text.lower())
|
42 |
-
|
43 |
-
# If no scenes found → keyword matching from ASSET_MAP keys
|
44 |
-
if not scenes:
|
45 |
-
for keyword in ASSET_MAP.keys():
|
46 |
-
if keyword in prompt.lower() and keyword in ["park", "office", "home", "school", "street"]:
|
47 |
-
scenes.append(keyword)
|
48 |
-
break
|
49 |
-
|
50 |
-
# If still no scene → fallback default
|
51 |
-
if not scenes:
|
52 |
-
scenes.append("park")
|
53 |
-
|
54 |
-
return characters, scenes
|
55 |
-
|
56 |
-
# --- Compose a single image frame ---
|
57 |
-
def compose_frame(background_path, character_paths, output_path, char_positions=None):
|
58 |
-
bg = Image.open(background_path).convert('RGBA')
|
59 |
-
for idx, char_path in enumerate(character_paths):
|
60 |
-
char_img = Image.open(char_path).convert('RGBA')
|
61 |
-
pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
|
62 |
-
bg.paste(char_img, pos, char_img)
|
63 |
-
bg.save(output_path)
|
64 |
|
65 |
-
# ---
|
66 |
-
def create_video_from_frames(frame_folder, output_path, fps=
|
67 |
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
|
68 |
if not images:
|
69 |
-
|
70 |
-
return
|
71 |
frame = cv2.imread(os.path.join(frame_folder, images[0]))
|
72 |
height, width, _ = frame.shape
|
73 |
|
@@ -76,32 +29,18 @@ def create_video_from_frames(frame_folder, output_path, fps=24):
|
|
76 |
video.write(cv2.imread(os.path.join(frame_folder, img)))
|
77 |
video.release()
|
78 |
|
79 |
-
# --- Main function
|
80 |
def generate_video(prompt):
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
|
86 |
-
char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
|
87 |
-
|
88 |
-
total_frames = 48 # 2 sec @ 24fps; increase to 2880 for 2 min
|
89 |
-
for i in range(total_frames):
|
90 |
-
positions = [(100 + i*2, 200) for _ in char_paths]
|
91 |
-
frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
|
92 |
-
compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
|
93 |
-
|
94 |
-
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
|
95 |
-
|
96 |
-
details = f"Characters detected: {characters if characters else 'default'}, Scene: {scenes[0]}"
|
97 |
-
return VIDEO_OUTPUT, details
|
98 |
|
99 |
-
# --- Gradio
|
100 |
iface = gr.Interface(
|
101 |
fn=generate_video,
|
102 |
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
|
103 |
-
outputs=
|
104 |
-
title="Text
|
105 |
)
|
106 |
|
107 |
if __name__ == "__main__":
|
|
|
1 |
import gradio as gr
|
2 |
+
from diffusers import StableDiffusionPipeline
|
|
|
3 |
import os
|
4 |
import cv2
|
|
|
5 |
|
6 |
+
# --- Initialize Stable Diffusion pipeline ---
|
7 |
+
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
FRAME_FOLDER = "frames"
|
10 |
VIDEO_OUTPUT = "generated_video.mp4"
|
11 |
|
12 |
+
# --- Function to generate N frames using AI ---
|
13 |
+
def generate_frames(prompt, num_frames=10):
|
14 |
+
os.makedirs(FRAME_FOLDER, exist_ok=True)
|
15 |
+
for i in range(num_frames):
|
16 |
+
image = pipe(prompt).images[0]
|
17 |
+
image.save(f"{FRAME_FOLDER}/frame_{i:03d}.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
# --- Function to create video from frames ---
|
20 |
+
def create_video_from_frames(frame_folder, output_path, fps=2):
|
21 |
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
|
22 |
if not images:
|
23 |
+
raise ValueError("No frames generated.")
|
|
|
24 |
frame = cv2.imread(os.path.join(frame_folder, images[0]))
|
25 |
height, width, _ = frame.shape
|
26 |
|
|
|
29 |
video.write(cv2.imread(os.path.join(frame_folder, img)))
|
30 |
video.release()
|
31 |
|
32 |
+
# --- Main function called by Gradio ---
|
33 |
def generate_video(prompt):
|
34 |
+
generate_frames(prompt, num_frames=10) # Generate 10 frames
|
35 |
+
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT, fps=2) # 2 fps
|
36 |
+
return VIDEO_OUTPUT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# --- Gradio UI ---
|
39 |
iface = gr.Interface(
|
40 |
fn=generate_video,
|
41 |
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
|
42 |
+
outputs=gr.Video(),
|
43 |
+
title="AI Text-to-Video Generator (No manual assets needed)"
|
44 |
)
|
45 |
|
46 |
if __name__ == "__main__":
|