File size: 3,772 Bytes
bad6e6b 05bed8e bad6e6b 3c1cfdc 05bed8e bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 05bed8e bad6e6b 05bed8e bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 3c1cfdc bad6e6b 05bed8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import spacy
from PIL import Image
import os
import cv2
import subprocess
# --- Load SpaCy model dynamically (avoids build-time download issues) ---
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# --- Define asset mapping (character and background files) ---
ASSET_MAP = {
"man": "assets/characters/man.png",
"woman": "assets/characters/woman.png",
"dog": "assets/characters/dog.png",
"park": "assets/backgrounds/park.jpg",
"office": "assets/backgrounds/office.jpg",
"home": "assets/backgrounds/home.jpg",
"school": "assets/backgrounds/school.jpg",
"street": "assets/backgrounds/street.jpg"
}
FRAME_FOLDER = "frames"
VIDEO_OUTPUT = "generated_video.mp4"
# --- Extract characters and scenes from prompt ---
def extract_entities(prompt):
doc = nlp(prompt)
characters = []
scenes = []
# Named Entity Recognition
for ent in doc.ents:
if ent.label_ in ["PERSON", "ORG"]:
characters.append(ent.text.lower())
elif ent.label_ in ["LOC", "GPE", "FAC"]:
scenes.append(ent.text.lower())
# If no scenes found β keyword matching from ASSET_MAP keys
if not scenes:
for keyword in ASSET_MAP.keys():
if keyword in prompt.lower() and keyword in ["park", "office", "home", "school", "street"]:
scenes.append(keyword)
break
# If still no scene β fallback default
if not scenes:
scenes.append("park")
return characters, scenes
# --- Compose a single image frame ---
def compose_frame(background_path, character_paths, output_path, char_positions=None):
bg = Image.open(background_path).convert('RGBA')
for idx, char_path in enumerate(character_paths):
char_img = Image.open(char_path).convert('RGBA')
pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
bg.paste(char_img, pos, char_img)
bg.save(output_path)
# --- Create a video from image frames ---
def create_video_from_frames(frame_folder, output_path, fps=24):
images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
if not images:
print("No frames found!")
return
frame = cv2.imread(os.path.join(frame_folder, images[0]))
height, width, _ = frame.shape
video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
for img in images:
video.write(cv2.imread(os.path.join(frame_folder, img)))
video.release()
# --- Main function triggered by Gradio ---
def generate_video(prompt):
characters, scenes = extract_entities(prompt)
os.makedirs(FRAME_FOLDER, exist_ok=True)
bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
total_frames = 48 # 2 sec @ 24fps; increase to 2880 for 2 min
for i in range(total_frames):
positions = [(100 + i*2, 200) for _ in char_paths]
frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
details = f"Characters detected: {characters if characters else 'default'}, Scene: {scenes[0]}"
return VIDEO_OUTPUT, details
# --- Gradio interface setup ---
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
outputs=[gr.Video(), gr.Textbox()],
title="Text to Video AI App (with fallback scenes)"
)
if __name__ == "__main__":
iface.launch() |