|
import gradio as gr |
|
import spacy |
|
from PIL import Image |
|
import os |
|
import cv2 |
|
import subprocess |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
ASSET_MAP = { |
|
"man": "assets/characters/man.png", |
|
"woman": "assets/characters/woman.png", |
|
"dog": "assets/characters/dog.png", |
|
"park": "assets/backgrounds/park.jpg", |
|
"office": "assets/backgrounds/office.jpg" |
|
} |
|
|
|
FRAME_FOLDER = "frames" |
|
VIDEO_OUTPUT = "generated_video.mp4" |
|
|
|
|
|
def extract_entities(prompt): |
|
doc = nlp(prompt) |
|
characters = [] |
|
scenes = [] |
|
for ent in doc.ents: |
|
if ent.label_ in ["PERSON", "ORG"]: |
|
characters.append(ent.text.lower()) |
|
elif ent.label_ in ["LOC", "GPE", "FAC"]: |
|
scenes.append(ent.text.lower()) |
|
return characters, scenes |
|
|
|
|
|
def compose_frame(background_path, character_paths, output_path, char_positions=None): |
|
bg = Image.open(background_path).convert('RGBA') |
|
for idx, char_path in enumerate(character_paths): |
|
char_img = Image.open(char_path).convert('RGBA') |
|
pos = (100 + idx*100, 200) if not char_positions else char_positions[idx] |
|
bg.paste(char_img, pos, char_img) |
|
bg.save(output_path) |
|
|
|
|
|
def create_video_from_frames(frame_folder, output_path, fps=24): |
|
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")]) |
|
if not images: |
|
print("No frames found!") |
|
return |
|
frame = cv2.imread(os.path.join(frame_folder, images[0])) |
|
height, width, _ = frame.shape |
|
|
|
video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) |
|
for img in images: |
|
video.write(cv2.imread(os.path.join(frame_folder, img))) |
|
video.release() |
|
|
|
|
|
def generate_video(prompt): |
|
characters, scenes = extract_entities(prompt) |
|
|
|
if not scenes: |
|
return None, "No scene detected! Please include a place in your prompt." |
|
|
|
os.makedirs(FRAME_FOLDER, exist_ok=True) |
|
|
|
bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"]) |
|
char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters] |
|
|
|
total_frames = 48 |
|
for i in range(total_frames): |
|
positions = [(100 + i*2, 200) for _ in char_paths] |
|
frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png") |
|
compose_frame(bg_path, char_paths, frame_path, char_positions=positions) |
|
|
|
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT) |
|
return VIDEO_OUTPUT, f"Characters: {characters}, Scenes: {scenes}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_video, |
|
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."), |
|
outputs=[gr.Video(), gr.Textbox()], |
|
title="Text to Video AI App" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |