Spaces:

AZLABS
/

Comic-2

Running

File size: 13,401 Bytes

dffab80
4ae5311
 
 
 
 
 
 
 
 
 
 
 
 
dffab80
 
 
4ae5311
 
 
dffab80
 
 
4ae5311
dffab80
4ae5311
dffab80
 
4ae5311
 
 
 
 
 
dffab80
4ae5311
 
 
 
dffab80
4ae5311
 
 
dffab80
 
 
4ae5311
 
 
 
 
dffab80
 
 
4ae5311
 
 
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
 
 
 
 
4ae5311
dffab80
fed41f3
dffab80
4ae5311
 
dffab80
4ae5311
 
 
 
 
dffab80
4ae5311
 
 
 
dffab80
 
4ae5311
dffab80
 
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
 
 
 
 
dffab80
4ae5311
 
 
 
dffab80
 
4ae5311
dffab80
 
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
 
 
 
 
 
 
dffab80
4ae5311
 
 
 
dffab80
4ae5311
 
 
 
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
dffab80
 
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
 
 
 
dffab80
 
4ae5311
dffab80
 
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
 
 
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
 
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
 
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
 
4ae5311
 
 
 
 
dffab80
4ae5311
 
dffab80
 
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
 
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
 
dffab80
4ae5311
dffab80
 
4ae5311
 
dffab80
4ae5311
dffab80
 
 
 
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
4ae5311
dffab80
 
4ae5311
dffab80
4ae5311
 
 
dffab80
4ae5311
 
 
dffab80
4ae5311
dffab80
 
 
4ae5311
 
dffab80
4ae5311
dffab80
 
 
4ae5311
 
 
 
dffab80


import os
import json
import urllib.request
from PIL import Image
from gtts import gTTS
import cv2
import moviepy.editor as mp
import logging
from hercai import Hercai
import uuid
import time
import gradio as gr

# Configure detailed logging
log_dir = os.getenv('LOG_DIRECTORY', './')  # Get log directory from environment variable, default to current directory
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')  # Construct the full path to the log file

logging.basicConfig(
    filename=LOGGER_FILE_PATH,
    filemode='a',  # Append to the log file
    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',  # Log format
    datefmt='%Y-%b-%d %H:%M:%S'  # Date and time format
)
LOGGER = logging.getLogger(__name__)  # Get the logger instance

log_level_env = os.getenv('LOG_LEVEL', 'INFO')  # Get log level from environment variable, default to INFO
log_level_dict = {  # Dictionary mapping log level names to their corresponding numerical values
    'DEBUG': logging.DEBUG,
    'INFO': logging.INFO,
    'WARNING': logging.WARNING,
    'ERROR': logging.ERROR,
    'CRITICAL': logging.CRITICAL
}
# Set the log level based on the environment variable or default to INFO
if log_level_env in log_level_dict:
    log_level = log_level_dict[log_level_env]
else:
    log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)  # Set the log level for the logger instance


class Text2Video:
    """
    A class to generate videos from text prompts, with detailed logging and a user-friendly interface.
    """

    def __init__(self) -> None:
        """
        Initialize the Text2Video class.
        """
        LOGGER.info("Initializing Text2Video class")
        self.herc = Hercai("")  # Replace "" with your actual Hercai API key if you have one
        LOGGER.info("Hercai initialized successfully")

    def get_image(self, img_prompt: str) -> str:
        """
        Generate an image from a text prompt using Hercai, with detailed logging and comic book styling.

        Args:
            img_prompt (str): The text prompt to generate the image from.

        Returns:
            str: The URL of the generated image. Returns an empty string if an error occurred.
        """
        LOGGER.info(f"Generating image for prompt: {img_prompt}")
        try:
            # Create a comic book style prompt
            modified_prompt = f"Generate a comic book style image with speech bubbles containing the following text: '{img_prompt}'. " \
                              f"Include elements like vibrant colors, onomatopoeia, and exaggerated expressions to enhance the comic book aesthetic."
            # Log the modified prompt
            LOGGER.info(f"Modified prompt for Hercai: {modified_prompt}")

            # Generate the image using Hercai
            image_result = self.herc.draw_image(model="v3", prompt=modified_prompt, negative_prompt="Dark and gloomy")
            # Extract the image URL from the result
            image_url = image_result["url"]

            # Log the generated image URL
            LOGGER.info(f"Image generated successfully: {image_url}")
            return image_url

        except Exception as e:
            # Log any errors encountered during image generation
            LOGGER.error(f"Error generating image for prompt '{img_prompt}': {e}")
            return ""

    def download_img_from_url(self, image_url: str, image_path: str) -> str:
        """
        Download an image from a URL to a local file path.

        Args:
            image_url (str): The URL of the image to download.
            image_path (str): The local file path to save the downloaded image.

        Returns:
            str: The local file path of the downloaded image. Returns an empty string if an error occurred.
        """
        LOGGER.info(f"Downloading image from URL: {image_url}")
        try:
            # Download the image from the URL and save it to the specified path
            urllib.request.urlretrieve(image_url, image_path)

            LOGGER.info(f"Image downloaded and saved to: {image_path}")
            return image_path

        except Exception as e:
            # Log any errors encountered during image download
            LOGGER.error(f"Error downloading image from URL '{image_url}': {e}")
            return ""

    def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
        """
        Convert text to speech using gTTS and save it as an audio file.

        Args:
            img_prompt (str): The text to convert to speech.
            audio_path (str): The local file path to save the generated audio file.

        Returns:
            str: The local file path of the saved audio file. Returns an empty string if an error occurred.
        """
        LOGGER.info(f"Converting text to audio: {img_prompt}")
        try:
            # Set the language for speech synthesis (English in this case)
            language = 'en'

            # Create a gTTS object to convert text to speech
            myobj = gTTS(text=img_prompt, lang=language, slow=False)

            # Save the audio file at the specified path
            myobj.save(audio_path)

            LOGGER.info(f"Audio saved to: {audio_path}")
            return audio_path
        except Exception as e:
            # Log any errors encountered during text-to-audio conversion
            LOGGER.error(f"Error converting text '{img_prompt}' to audio: {e}")
            return ""

    def get_images_and_audio(self, list_prompts: list) -> tuple:
        """
        Generate images and corresponding audio files for a list of text prompts.

        Args:
            list_prompts (list): A list of text prompts.

        Returns:
            tuple: A tuple containing two lists: image paths and audio paths.
        """
        LOGGER.info("Generating images and audio for prompts")
        img_list = []  # List to store image paths
        audio_paths = []  # List to store audio paths
        for img_prompt in list_prompts:
            LOGGER.info(f"Processing prompt: {img_prompt}")
            try:
                # Generate a unique identifier for the image and audio files
                unique_id = uuid.uuid4().hex

                # Construct the image path using the unique identifier
                image_path = f"{img_prompt[:9]}_{unique_id}.png"

                # Generate the image URL using Hercai
                img_url = self.get_image(img_prompt)

                # Download the image from the generated URL
                image = self.download_img_from_url(img_url, image_path)

                # Add the image path to the list
                img_list.append(image)

                # Construct the audio path using the unique identifier
                audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"

                # Convert the text to audio and save it
                audio = self.text_to_audio(img_prompt, audio_path)

                # Add the audio path to the list
                audio_paths.append(audio)

            except Exception as e:
                # Log any errors encountered during the process
                LOGGER.error(f"Error processing prompt '{img_prompt}': {e}")

        # Return the lists of image paths and audio paths
        LOGGER.info("Images and audio generated successfully")
        return img_list, audio_paths

    def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
        """
        Generate a video from a list of image files and corresponding audio files.

        Args:
            image_files (list): A list of local file paths to image files.
            audio_files (list): A list of local file paths to audio files.
            output_path (str): The local file path where the generated video will be saved.
        """
        LOGGER.info("Creating video from images and audio")
        try:
            # Check if the number of images and audio files match
            if len(image_files) != len(audio_files):
                # Log an error if the number of image files and audio files don't match
                LOGGER.error("Error: Number of images doesn't match the number of audio files.")
                return

            # Create an empty list to store video clips
            video_clips = []

            # Loop through each image file and corresponding audio file
            for image_file, audio_file in zip(image_files, audio_files):
                LOGGER.info(f"Processing image: {image_file}, audio: {audio_file}")

                # Read the image file using OpenCV
                frame = cv2.imread(image_file)

                # Load the audio clip using MoviePy
                audio_clip = mp.AudioFileClip(audio_file)

                # Create a video clip from the image and set its duration to the audio clip's duration
                video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)

                # Set the audio for the video clip
                video_clip = video_clip.set_audio(audio_clip)

                # Append the video clip to the list of video clips
                video_clips.append(video_clip)

            # Concatenate all the video clips into a single video clip
            final_clip = mp.concatenate_videoclips(video_clips)

            # Write the final video clip to a file using the specified output path
            final_clip.write_videofile(output_path, codec='libx264', fps=24)

            LOGGER.info(f"Video created successfully at: {output_path}")

        except Exception as e:
            # Log any errors encountered during video creation
            LOGGER.error(f"Error creating video: {e}")


    def generate_video(self, text: str) -> str:
        """
        Generate a video from a comma-separated string of text prompts

        Args:
            text (str): A comma-separated string of text prompts, where each prompt represents a scene or frame in the video.

        Returns:
            str: The file path of the generated video file. Returns an empty string if an error occurred.
        """
        LOGGER.info("Generating video from text")
        try:
            # Split the input text into a list of prompts
            list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
            LOGGER.info(f"Prompts extracted from text: {list_prompts}")

            # Define the output path for the generated video
            output_path = "output_video.mp4"

            # Generate images and corresponding audio files for each prompt
            img_list, audio_paths = self.get_images_and_audio(list_prompts)

            # Create the video from the generated images and audio files
            self.create_video_from_images_and_audio(img_list, audio_paths, output_path)

            LOGGER.info(f"Video generated successfully: {output_path}")
            return output_path

        except Exception as e:
            # Log any errors encountered during video generation
            LOGGER.error(f"Error generating video from text '{text}': {e}")
            return ""


    def gradio_interface(self):
        """
        Creates a user-friendly Gradio interface for the video generation application.
        """
        LOGGER.info("Launching Gradio interface")
        with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
            # Set the title of the application
            gr.HTML("""
                    <center><h1 style="color:#fff">Comics Video Generator</h1></center>""")

            # Create a text box for user input, allowing them to enter comic book text
            with gr.Row(elem_id="col-container"):
                input_text = gr.Textbox(label="Comics Text",
                                       placeholder="Enter the comics text, separating scenes with double commas (,,)")

            # Create a button that triggers the video generation process
            with gr.Row(elem_id="col-container"):
                button = gr.Button("Generate Video")

            # Create a component to display the generated video
            with gr.Row(elem_id="col-container"):
                output = gr.PlayableVideo()

            # Provide an example to guide users on how to format their input
            with gr.Row(elem_id="col-container"):
                example_txt = """Once upon a time there was a village. It was a nice place to live, except for one thing. People did not like to share.,, 
                One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
                'That's okay', said the visitor. 'I will make stone soup for everyone'. Then he took a stone and dropped it into a giant pot,,"""
                example = gr.Examples([example_txt], input_text)

            # Define the button's click event to call the generate_video function with the user's input
            button.click(self.generate_video, [input_text], output)

            LOGGER.info("Gradio interface launched successfully")
            # Launch the Gradio interface
            demo.launch(debug=True)


if __name__ == "__main__":
    LOGGER.info("Starting application")
    text2video = Text2Video()  # Create an instance of the Text2Video class
    text2video.gradio_interface()  # Launch the Gradio interface