Spaces:

Dhan98
/

videoGen

Running

App Files Files Community

Dhan98 commited on Dec 4, 2024

Commit

1d25574

verified ·

1 Parent(s): 762ce93

new changes

Browse files

Files changed (1) hide show

app.py +39 -21

app.py CHANGED Viewed

@@ -1,21 +1,26 @@
 import streamlit as st
-from transformers import AutoProcessor, AutoModelForCausalLM, BlipProcessor, BlipForConditionalGeneration
 import torch
 import cv2
 import numpy as np
 from PIL import Image
 @st.cache_resource
 def load_models():
-   # Text-to-video model
-   video_model = AutoModelForCausalLM.from_pretrained("damo-vilab/text-to-video-ms-1.7b", trust_remote_code=True)
-   video_processor = AutoProcessor.from_pretrained("damo-vilab/text-to-video-ms-1.7b")
-   # Image captioning
    blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-   return video_model, video_processor, blip, blip_processor
 def enhance_image(image):
    img = np.array(image)
@@ -32,36 +37,49 @@ def get_description(image, blip_model, blip_processor):
    output = blip_model.generate(**inputs, max_length=50)
    return blip_processor.decode(output[0], skip_special_tokens=True)
-def generate_video(model, processor, description):
-   inputs = processor(text=description, return_tensors="pt")
-   with torch.no_grad():
-       video_frames = model.generate(
-           **inputs,
-           num_frames=16,
-           num_inference_steps=50,
-           guidance_scale=7.5
-       )
-   return video_frames
 def main():
    st.title("Video Generator")
-   models = load_models()
-   video_model, video_processor, blip, blip_processor = models
    image_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg'])
    if image_file:
        image = Image.open(image_file)
        enhanced_image = enhance_image(image)
        st.image(enhanced_image, caption="Enhanced Image")
        description = get_description(enhanced_image, blip, blip_processor)
        st.write("Image Description:", description)
        if st.button("Generate Video"):
-           with st.spinner("Generating..."):
-               video = generate_video(video_model, video_processor, description)
-               st.video(video)
 if __name__ == "__main__":
    main()

 import streamlit as st
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from diffusers import DiffusionPipeline
 import torch
 import cv2
 import numpy as np
 from PIL import Image
+import tempfile
+import os
 @st.cache_resource
 def load_models():
+   pipeline = DiffusionPipeline.from_pretrained(
+       "cerspense/zeroscope_v2_576w",
+       torch_dtype=torch.float16
+   )
+   if torch.cuda.is_available():
+       pipeline.to("cuda")
    blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+   return pipeline, blip, blip_processor
 def enhance_image(image):
    img = np.array(image)
    output = blip_model.generate(**inputs, max_length=50)
    return blip_processor.decode(output[0], skip_special_tokens=True)
+def save_video_frames(frames, fps=8):
+   temp_dir = tempfile.mkdtemp()
+   temp_path = os.path.join(temp_dir, "output.mp4")
+   height, width = frames[0].shape[:2]
+   fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+   video_writer = cv2.VideoWriter(temp_path, fourcc, fps, (width, height))
+   for frame in frames:
+       video_writer.write(frame)
+   video_writer.release()
+   return temp_path
+def generate_video(pipeline, description):
+   video_frames = pipeline(
+       description,
+       num_inference_steps=50,
+       num_frames=24
+   ).frames
+   video_path = save_video_frames(video_frames)
+   return video_path
 def main():
    st.title("Video Generator")
+   pipeline, blip, blip_processor = load_models()
    image_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg'])
    if image_file:
        image = Image.open(image_file)
        enhanced_image = enhance_image(image)
        st.image(enhanced_image, caption="Enhanced Image")
        description = get_description(enhanced_image, blip, blip_processor)
        st.write("Image Description:", description)
        if st.button("Generate Video"):
+           with st.spinner("Generating video..."):
+               video_path = generate_video(pipeline, description)
+               st.video(video_path)
 if __name__ == "__main__":
    main()