Spaces:

ZeeAI1
/

t2a

Sleeping

App Files Files Community

t2a / app.py

ZeeAI1

Update app.py

3c1cfdc verified 28 days ago

raw

history blame

3.77 kB

	import gradio as gr
	import spacy
	from PIL import Image
	import os
	import cv2
	import subprocess

	# --- Load SpaCy model dynamically (avoids build-time download issues) ---
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# --- Define asset mapping (character and background files) ---
	ASSET_MAP = {
	"man": "assets/characters/man.png",
	"woman": "assets/characters/woman.png",
	"dog": "assets/characters/dog.png",
	"park": "assets/backgrounds/park.jpg",
	"office": "assets/backgrounds/office.jpg",
	"home": "assets/backgrounds/home.jpg",
	"school": "assets/backgrounds/school.jpg",
	"street": "assets/backgrounds/street.jpg"
	}

	FRAME_FOLDER = "frames"
	VIDEO_OUTPUT = "generated_video.mp4"

	# --- Extract characters and scenes from prompt ---
	def extract_entities(prompt):
	doc = nlp(prompt)
	characters = []
	scenes = []

	# Named Entity Recognition
	for ent in doc.ents:
	if ent.label_ in ["PERSON", "ORG"]:
	characters.append(ent.text.lower())
	elif ent.label_ in ["LOC", "GPE", "FAC"]:
	scenes.append(ent.text.lower())

	# If no scenes found → keyword matching from ASSET_MAP keys
	if not scenes:
	for keyword in ASSET_MAP.keys():
	if keyword in prompt.lower() and keyword in ["park", "office", "home", "school", "street"]:
	scenes.append(keyword)
	break

	# If still no scene → fallback default
	if not scenes:
	scenes.append("park")

	return characters, scenes

	# --- Compose a single image frame ---
	def compose_frame(background_path, character_paths, output_path, char_positions=None):
	bg = Image.open(background_path).convert('RGBA')
	for idx, char_path in enumerate(character_paths):
	char_img = Image.open(char_path).convert('RGBA')
	pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
	bg.paste(char_img, pos, char_img)
	bg.save(output_path)

	# --- Create a video from image frames ---
	def create_video_from_frames(frame_folder, output_path, fps=24):
	images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
	if not images:
	print("No frames found!")
	return
	frame = cv2.imread(os.path.join(frame_folder, images[0]))
	height, width, _ = frame.shape

	video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
	for img in images:
	video.write(cv2.imread(os.path.join(frame_folder, img)))
	video.release()

	# --- Main function triggered by Gradio ---
	def generate_video(prompt):
	characters, scenes = extract_entities(prompt)

	os.makedirs(FRAME_FOLDER, exist_ok=True)

	bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
	char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]

	total_frames = 48 # 2 sec @ 24fps; increase to 2880 for 2 min
	for i in range(total_frames):
	positions = [(100 + i*2, 200) for _ in char_paths]
	frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
	compose_frame(bg_path, char_paths, frame_path, char_positions=positions)

	create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)

	details = f"Characters detected: {characters if characters else 'default'}, Scene: {scenes[0]}"
	return VIDEO_OUTPUT, details

	# --- Gradio interface setup ---
	iface = gr.Interface(
	fn=generate_video,
	inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
	outputs=[gr.Video(), gr.Textbox()],
	title="Text to Video AI App (with fallback scenes)"
	)

	if __name__ == "__main__":
	iface.launch()