ZeeAI1 commited on
Commit
0441f0b
·
verified ·
1 Parent(s): ce19dce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -80
app.py CHANGED
@@ -1,73 +1,26 @@
1
  import gradio as gr
2
- import spacy
3
- from PIL import Image
4
  import os
5
  import cv2
6
- import subprocess
7
 
8
- # --- Load SpaCy model dynamically (avoids build-time download issues) ---
9
- try:
10
- nlp = spacy.load("en_core_web_sm")
11
- except OSError:
12
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
13
- nlp = spacy.load("en_core_web_sm")
14
-
15
- # --- Define asset mapping (character and background files) ---
16
- ASSET_MAP = {
17
- "man": "assets/characters/man.png",
18
- "woman": "assets/characters/woman.png",
19
- "dog": "assets/characters/dog.png",
20
- "park": "assets/backgrounds/park.jpg",
21
- "office": "assets/backgrounds/office.jpg",
22
- "home": "assets/backgrounds/home.jpg",
23
- "school": "assets/backgrounds/school.jpg",
24
- "street": "assets/backgrounds/street.jpg"
25
- }
26
 
27
  FRAME_FOLDER = "frames"
28
  VIDEO_OUTPUT = "generated_video.mp4"
29
 
30
- # --- Extract characters and scenes from prompt ---
31
- def extract_entities(prompt):
32
- doc = nlp(prompt)
33
- characters = []
34
- scenes = []
35
-
36
- # Named Entity Recognition
37
- for ent in doc.ents:
38
- if ent.label_ in ["PERSON", "ORG"]:
39
- characters.append(ent.text.lower())
40
- elif ent.label_ in ["LOC", "GPE", "FAC"]:
41
- scenes.append(ent.text.lower())
42
-
43
- # If no scenes found → keyword matching from ASSET_MAP keys
44
- if not scenes:
45
- for keyword in ASSET_MAP.keys():
46
- if keyword in prompt.lower() and keyword in ["park", "office", "home", "school", "street"]:
47
- scenes.append(keyword)
48
- break
49
-
50
- # If still no scene → fallback default
51
- if not scenes:
52
- scenes.append("park")
53
-
54
- return characters, scenes
55
-
56
- # --- Compose a single image frame ---
57
- def compose_frame(background_path, character_paths, output_path, char_positions=None):
58
- bg = Image.open(background_path).convert('RGBA')
59
- for idx, char_path in enumerate(character_paths):
60
- char_img = Image.open(char_path).convert('RGBA')
61
- pos = (100 + idx*100, 200) if not char_positions else char_positions[idx]
62
- bg.paste(char_img, pos, char_img)
63
- bg.save(output_path)
64
 
65
- # --- Create a video from image frames ---
66
- def create_video_from_frames(frame_folder, output_path, fps=24):
67
  images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
68
  if not images:
69
- print("No frames found!")
70
- return
71
  frame = cv2.imread(os.path.join(frame_folder, images[0]))
72
  height, width, _ = frame.shape
73
 
@@ -76,32 +29,18 @@ def create_video_from_frames(frame_folder, output_path, fps=24):
76
  video.write(cv2.imread(os.path.join(frame_folder, img)))
77
  video.release()
78
 
79
- # --- Main function triggered by Gradio ---
80
  def generate_video(prompt):
81
- characters, scenes = extract_entities(prompt)
82
-
83
- os.makedirs(FRAME_FOLDER, exist_ok=True)
84
-
85
- bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
86
- char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
87
-
88
- total_frames = 48 # 2 sec @ 24fps; increase to 2880 for 2 min
89
- for i in range(total_frames):
90
- positions = [(100 + i*2, 200) for _ in char_paths]
91
- frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
92
- compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
93
-
94
- create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
95
-
96
- details = f"Characters detected: {characters if characters else 'default'}, Scene: {scenes[0]}"
97
- return VIDEO_OUTPUT, details
98
 
99
- # --- Gradio interface setup ---
100
  iface = gr.Interface(
101
  fn=generate_video,
102
  inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
103
- outputs=[gr.Video(), gr.Textbox()],
104
- title="Text to Video AI App (with fallback scenes)"
105
  )
106
 
107
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from diffusers import StableDiffusionPipeline
 
3
  import os
4
  import cv2
 
5
 
6
+ # --- Initialize Stable Diffusion pipeline ---
7
+ pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  FRAME_FOLDER = "frames"
10
  VIDEO_OUTPUT = "generated_video.mp4"
11
 
12
+ # --- Function to generate N frames using AI ---
13
+ def generate_frames(prompt, num_frames=10):
14
+ os.makedirs(FRAME_FOLDER, exist_ok=True)
15
+ for i in range(num_frames):
16
+ image = pipe(prompt).images[0]
17
+ image.save(f"{FRAME_FOLDER}/frame_{i:03d}.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # --- Function to create video from frames ---
20
+ def create_video_from_frames(frame_folder, output_path, fps=2):
21
  images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
22
  if not images:
23
+ raise ValueError("No frames generated.")
 
24
  frame = cv2.imread(os.path.join(frame_folder, images[0]))
25
  height, width, _ = frame.shape
26
 
 
29
  video.write(cv2.imread(os.path.join(frame_folder, img)))
30
  video.release()
31
 
32
+ # --- Main function called by Gradio ---
33
  def generate_video(prompt):
34
+ generate_frames(prompt, num_frames=10) # Generate 10 frames
35
+ create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT, fps=2) # 2 fps
36
+ return VIDEO_OUTPUT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # --- Gradio UI ---
39
  iface = gr.Interface(
40
  fn=generate_video,
41
  inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
42
+ outputs=gr.Video(),
43
+ title="AI Text-to-Video Generator (No manual assets needed)"
44
  )
45
 
46
  if __name__ == "__main__":