t2a / app.py
ZeeAI1's picture
Update app.py
05bed8e verified
raw
history blame
3.22 kB
import gradio as gr
import spacy
from PIL import Image
import os
import cv2
import subprocess
# --- Dynamic SpaCy model loading (avoids download at build time) ---
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# --- Asset mapping (character/background library) ---
ASSET_MAP = {
"man": "assets/characters/man.png",
"woman": "assets/characters/woman.png",
"dog": "assets/characters/dog.png",
"park": "assets/backgrounds/park.jpg",
"office": "assets/backgrounds/office.jpg"
}
FRAME_FOLDER = "frames"
VIDEO_OUTPUT = "generated_video.mp4"
# --- Extract characters/scenes from prompt ---
def extract_entities(prompt):
doc = nlp(prompt)
characters = []
scenes = []
for ent in doc.ents:
if ent.label_ in ["PERSON", "ORG"]:
characters.append(ent.text.lower())
elif ent.label_ in ["LOC", "GPE", "FAC"]:
scenes.append(ent.text.lower())
return characters, scenes
# --- Compose a single frame ---
def compose_frame(background_path, character_paths, output_path, char_positions=None):
bg = Image.open(background_path).convert('RGBA')
for idx, char_path in enumerate(character_paths):
char_img = Image.open(char_path).convert('RGBA')
pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
bg.paste(char_img, pos, char_img)
bg.save(output_path)
# --- Create video from frames ---
def create_video_from_frames(frame_folder, output_path, fps=24):
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
if not images:
print("No frames found!")
return
frame = cv2.imread(os.path.join(frame_folder, images[0]))
height, width, _ = frame.shape
video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
for img in images:
video.write(cv2.imread(os.path.join(frame_folder, img)))
video.release()
# --- Main function triggered by Gradio ---
def generate_video(prompt):
characters, scenes = extract_entities(prompt)
if not scenes:
return None, "No scene detected! Please include a place in your prompt."
os.makedirs(FRAME_FOLDER, exist_ok=True)
bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
total_frames = 48 # ~2 seconds at 24fps; increase to 2880 for 2 min
for i in range(total_frames):
positions = [(100 + i*2, 200) for _ in char_paths] # Simple horizontal movement
frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
return VIDEO_OUTPUT, f"Characters: {characters}, Scenes: {scenes}"
# --- Gradio interface ---
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
outputs=[gr.Video(), gr.Textbox()],
title="Text to Video AI App"
)
if __name__ == "__main__":
iface.launch()