import gradio as gr from google import genai from google.genai import types from PIL import Image from io import BytesIO import base64 import os import json import random import urllib.parse import time # Initialize the Google Generative AI client with the API key from environment variables try: api_key = os.environ['GEMINI_API_KEY'] except KeyError: raise ValueError("Please set the GEMINI_API_KEY environment variable.") client = genai.Client(api_key=api_key) def clean_response_text(response_text): """ Clean the API response by removing Markdown code block markers. Args: response_text (str): The raw response text from the API. Returns: str: The cleaned response text. """ cleaned_text = response_text.strip() if cleaned_text.startswith("```json"): cleaned_text = cleaned_text[len("```json"):].strip() if cleaned_text.endswith("```"): cleaned_text = cleaned_text[:-len("```")].strip() return cleaned_text def generate_ideas(user_input): """ Generate a diverse set of ideas based on the user's input concept using the LLM. Args: user_input (str): The user's input concept or idea (e.g., "blindfolded Rubik's Cube challenge"). Returns: list: A list of ideas as strings. """ prompt = f""" The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}". Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics. Each idea should be a short sentence describing a specific scene or concept. Return the response as a JSON object with a single key 'ideas' containing a list of 5 ideas. Ensure the response is strictly in JSON format. Example for "blindfolded Rubik's Cube challenge": {{"ideas": [ "A blindfolded speedcubing competition with dramatic music", "A close-up of a person solving a Rubik's Cube blindfolded under a spotlight", "A time-lapse of a blindfolded Rubik's Cube solve with colorful lighting", "A blindfolded Rubik's Cube challenge in a futuristic setting", "A split-screen of two people racing to solve a Rubik's Cube blindfolded" ]}} """ try: response = client.models.generate_content( model='gemini-2.0-flash', contents=[prompt], config=types.GenerateContentConfig(temperature=1.2) ) print(f"Raw response for ideas: {response.text}") # Debugging if not response.text or response.text.isspace(): raise ValueError("Empty response from API") cleaned_text = clean_response_text(response.text) response_json = json.loads(cleaned_text) if 'ideas' not in response_json or not isinstance(response_json['ideas'], list) or len(response_json['ideas']) != 5: raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length") # Validate that ideas are related to user_input ideas = response_json['ideas'] for idea in ideas: if user_input.lower() not in idea.lower(): print(f"Warning: Idea '{idea}' does not seem related to '{user_input}'. Falling back to default ideas.") return [ f"A dramatic {user_input} scene with cinematic lighting", f"A close-up of {user_input} in a futuristic setting", f"A high-energy {user_input} moment with vibrant colors", f"A serene {user_input} scene with soft focus", f"An action-packed {user_input} challenge with dynamic angles" ] return ideas except Exception as e: print(f"Error generating ideas: {e}") return [ f"A dramatic {user_input} scene with cinematic lighting", f"A close-up of {user_input} in a futuristic setting", f"A high-energy {user_input} moment with vibrant colors", f"A serene {user_input} scene with soft focus", f"An action-packed {user_input} challenge with dynamic angles" ] def generate_item(user_input, ideas, generate_video=False, max_retries=3): """ Generate a single feed item (image and optionally one video) using one of the ideas. Args: user_input (str): The user's input concept or idea. ideas (list): List of ideas to choose from. generate_video (bool): Whether to generate a video from the image. max_retries (int): Maximum number of retries for both image and video generation. Returns: dict: A dictionary with 'text' (str), 'image_base64' (str), 'video_base64' (str or None), and 'ideas' (list). """ video_base64 = None # Retry loop for image generation for attempt in range(max_retries): selected_idea = random.choice(ideas) prompt = f""" The user has provided the concept: "{user_input}". Based on this concept and the specific idea "{selected_idea}", create content for a TikTok video. Return a JSON object with two keys: - 'caption': A short, viral TikTok-style caption with hashtags that reflects "{user_input}". - 'image_prompt': A detailed image prompt for generating a high-quality visual scene, ensuring the theme of "{user_input}" is central. The image prompt should describe the scene vividly, specify a perspective and style, and ensure no text or letters are included. Ensure the response is strictly in JSON format. Example: {{"caption": "Blindfolded Rubik's Cube MAGIC! 🤯 #rubiks", "image_prompt": "A close-up view of a person solving a Rubik's Cube blindfolded, in a dramatic style, no text or letters"}} """ try: response = client.models.generate_content( model='gemini-2.0-flash', contents=[prompt], config=types.GenerateContentConfig(temperature=1.2) ) print(f"Raw response for item (attempt {attempt + 1}): {response.text}") # Debugging if not response.text or response.text.isspace(): raise ValueError("Empty response from API") cleaned_text = clean_response_text(response.text) response_json = json.loads(cleaned_text) if 'caption' not in response_json or 'image_prompt' not in response_json: raise ValueError("Invalid JSON format: 'caption' or 'image_prompt' key missing") text = response_json['caption'] image_prompt = response_json['image_prompt'] except Exception as e: print(f"Error generating item (attempt {attempt + 1}): {e}") text = f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}" image_prompt = f"A vivid scene of {selected_idea} related to {user_input}, in a vibrant pop art style, no text or letters" # Attempt to generate the image try: imagen = client.models.generate_images( model='imagen-3.0-generate-002', prompt=image_prompt, config=types.GenerateImagesConfig( aspect_ratio="9:16", number_of_images=1 ) ) if imagen.generated_images and len(imagen.generated_images) > 0: generated_image = imagen.generated_images[0] image = Image.open(BytesIO(generated_image.image.image_bytes)) # Ensure the image matches the desired aspect ratio (9:16 = 0.5625) target_width = 360 target_height = int(target_width / 9 * 16) # 9:16 aspect ratio image = image.resize((target_width, target_height), Image.LANCZOS) # Convert image to base64 buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() # Generate video if enabled (with retries) if generate_video: for video_attempt in range(max_retries): try: # Base video prompt video_prompt_base = f""" The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video. Use a close-up shot with a slow dolly shot circling around the subject, using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting. """ # Modify the prompt slightly for each retry if video_attempt == 0: video_prompt = video_prompt_base elif video_attempt == 1: video_prompt = f""" The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video. Use a close-up shot focusing on the subject, with soft lighting and a realistic style. """ else: video_prompt = f""" The user concept is "{user_input}". Based on this and a simplified scene: {image_prompt}, create a video. Use a static close-up shot of the subject in a realistic style. """ print(f"Attempting video generation (attempt {video_attempt + 1}): {video_prompt}") operation = client.models.generate_videos( model="veo-2.0-generate-001", prompt=video_prompt, image=generated_image.image, config=types.GenerateVideosConfig( aspect_ratio="9:16", number_of_videos=1, duration_seconds=8, negative_prompt="blurry, low quality, text, letters" ) ) # Wait for video to generate while not operation.done: time.sleep(20) operation = client.operations.get(operation) # Enhanced error handling for video generation response if operation.error: raise ValueError(f"Video generation operation failed with error: {operation.error.message}") if operation.response is None: raise ValueError("Video generation operation failed: No response") if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None: raise ValueError("Video generation operation failed: No generated_videos in response") # Process the single generated video if len(operation.response.generated_videos) > 0: video = operation.response.generated_videos[0] if video is None or not hasattr(video, 'video'): raise ValueError("Video is invalid or missing video data") fname = 'with_image_input.mp4' print(f"Generated video: {fname}") # Download the video and get the raw bytes video_data = client.files.download(file=video.video) # Ensure video_data is in bytes if isinstance(video_data, bytes): video_bytes = video_data else: # If video_data is a file-like object, read the bytes video_buffer = BytesIO() for chunk in video_data: video_buffer.write(chunk) video_bytes = video_buffer.getvalue() # Encode the video bytes as base64 video_base64 = base64.b64encode(video_bytes).decode() break # Success, exit the retry loop else: raise ValueError("No video was generated") except Exception as e: print(f"Error generating video (attempt {video_attempt + 1}): {e}") if video_attempt == max_retries - 1: print("Max retries reached for video generation. Proceeding without video.") video_base64 = None else: continue # Retry with a modified prompt return { 'text': text, 'image_base64': img_str, 'video_base64': video_base64, 'ideas': ideas } else: print(f"Image generation failed (attempt {attempt + 1}): No images returned") if attempt == max_retries - 1: # Last attempt, use a gray placeholder image = Image.new('RGB', (360, 640), color='gray') buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return { 'text': text, 'image_base64': img_str, 'video_base64': None, 'ideas': ideas } # Retry with new ideas ideas = generate_ideas(user_input) continue except Exception as e: print(f"Error generating image (attempt {attempt + 1}): {e}") if attempt == max_retries - 1: # Last attempt, use a gray placeholder image = Image.new('RGB', (360, 640), color='gray') buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return { 'text': text, 'image_base64': img_str, 'video_base64': None, 'ideas': ideas } # Retry with new ideas ideas = generate_ideas(user_input) continue def start_feed(user_input, generate_video, current_index, feed_items): """ Start or update the feed based on the user input. Args: user_input (str): The user's input concept or idea. generate_video (bool): Whether to generate a video. current_index (int): The current item index. feed_items (list): The current list of feed items. Returns: tuple: (current_user_input, current_index, feed_items, html_content, share_links, is_loading) """ if not user_input.strip(): user_input = "trending" # Set loading state is_loading = True html_content = generate_html([], False, 0, user_input, is_loading) share_links = "" try: ideas = generate_ideas(user_input) item = generate_item(user_input, ideas, generate_video=generate_video) feed_items = [item] current_index = 0 share_links = generate_share_links( item['image_base64'], item['video_base64'], item['text'] ) except Exception as e: print(f"Error in start_feed: {e}") feed_items = [] current_index = 0 html_content = """

Error generating content. Please try again!

""" is_loading = False return user_input, current_index, feed_items, html_content, share_links, is_loading # Set loading state to False and update UI is_loading = False html_content = generate_html(feed_items, False, current_index, user_input, is_loading) return user_input, current_index, feed_items, html_content, share_links, is_loading def load_next(user_input, generate_video, current_index, feed_items): """ Load the next item in the feed. Args: user_input (str): The user's input concept or idea. generate_video (bool): Whether to generate a video. current_index (int): The current item index. feed_items (list): The current list of feed items. Returns: tuple: (current_user_input, current_index, feed_items, html_content, share_links, is_loading) """ is_loading = True html_content = generate_html(feed_items, False, current_index, user_input, is_loading) share_links = "" try: if current_index + 1 < len(feed_items): current_index += 1 else: ideas = feed_items[-1]['ideas'] if feed_items else generate_ideas(user_input) new_item = generate_item(user_input, ideas, generate_video=generate_video) feed_items.append(new_item) current_index = len(feed_items) - 1 share_links = generate_share_links( feed_items[current_index]['image_base64'], feed_items[current_index]['video_base64'], feed_items[current_index]['text'] ) except Exception as e: print(f"Error in load_next: {e}") html_content = """

Error generating content. Please try again!

""" is_loading = False return user_input, current_index, feed_items, html_content, share_links, is_loading is_loading = False html_content = generate_html(feed_items, False, current_index, user_input, is_loading) return user_input, current_index, feed_items, html_content, share_links, is_loading def load_previous(user_input, generate_video, current_index, feed_items): """ Load the previous item in the feed. Args: user_input (str): The user's input concept or idea. generate_video (bool): Whether to generate a video (not used here). current_index (int): The current item index. feed_items (list): The current list of feed items. Returns: tuple: (current_user_input, current_index, feed_items, html_content, share_links, is_loading) """ if current_index > 0: current_index -= 1 html_content = generate_html(feed_items, False, current_index, user_input, False) share_links = generate_share_links( feed_items[current_index]['image_base64'], feed_items[current_index]['video_base64'], feed_items[current_index]['text'] ) return user_input, current_index, feed_items, html_content, share_links, False def generate_share_links(image_base64, video_base64, caption): """ Generate share links for social media platforms with download links for image and video. Args: image_base64 (str): The base64-encoded image data. video_base64 (str or None): The base64-encoded video data (single video). caption (str): The caption to share. Returns: str: HTML string with share links and download instructions. """ image_data_url = f"data:image/png;base64,{image_base64}" encoded_caption = urllib.parse.quote(caption) # Generate download links for image and video (if available) download_links = f"""

Download the media to share:

Download Image """ if video_base64: # Only include video download link if a video exists video_data_url = f"data:video/mp4;base64,{video_base64}" download_links += f""" Download Video """ download_links += "
" # Generate share links for social media platforms share_links = """
Share on TikTok Share on Instagram Share on Facebook Share on X Share on Pinterest
""" # Add YouTube Shorts share button if a video is available youtube_share = "" if video_base64: # Only show YouTube Shorts share button if a video is generated youtube_share = f"""
Share to YouTube as a Short
""" return f"""
{download_links} {share_links} {youtube_share}
""".format(caption=encoded_caption) def generate_html(feed_items, scroll_to_latest=False, current_index=0, user_input="", is_loading=False): """ Generate an HTML string to display the current feed item with click navigation. Displays a video if available, otherwise falls back to the image. Args: feed_items (list): List of dictionaries containing 'text', 'image_base64', and 'video_base64'. scroll_to_latest (bool): Whether to auto-scroll to the latest item (not used here). current_index (int): The index of the item to display. user_input (str): The user's input concept or idea for loading messages. is_loading (bool): Whether the feed is currently loading. Returns: str: HTML string representing the feed. """ loading_messages = [ f"Cooking up a {user_input} masterpiece... 🍳", f"Snapping a vibrant {user_input} moment... 📸", f"Creating a {user_input} vibe that pops... ✨", f"Getting that perfect {user_input} shot... 🎥", f"Bringing {user_input} to life... 🌟" ] if is_loading: return f"""
{loading_messages[0]}
""" if not feed_items or current_index >= len(feed_items): return """

Enter a concept or idea to start your feed!

""" item = feed_items[current_index] # Check if there is a video to display if item['video_base64']: media_element = f""" """ else: # Fallback to image if no video is available media_element = f""" """ html_str = f"""
{media_element}
{item['text']}
""" return html_str # Define the Gradio interface with gr.Blocks( css=""" body { background-color: #000; color: #fff; font-family: Arial, sans-serif; } .gradio-container { max-width: 400px; margin: 0 auto; padding: 10px; } input, select, button, .gr-checkbox { border-radius: 5px; background-color: #222; color: #fff; border: 1px solid #444; } button { background-color: #ff2d55; border: none; } button:hover { background-color: #e0264b; } .gr-button { width: 100%; margin-top: 10px; } .gr-form { background-color: #111; padding: 15px; border-radius: 10px; } """, title="Create Your Feed" ) as demo: # State variables current_user_input = gr.State(value="") current_index = gr.State(value=0) feed_items = gr.State(value=[]) is_loading = gr.State(value=False) share_links = gr.State(value="") # Input section with gr.Column(elem_classes="gr-form"): gr.Markdown("### Create Your Feed") user_input = gr.Textbox( label="Enter Concept or Ideas", value="", placeholder="e.g., sushi adventure, neon tech", submit_btn=False ) generate_video_checkbox = gr.Checkbox( label="Generate Video (may take longer)", value=False ) magic_button = gr.Button("✨ Generate Next Item", elem_classes="gr-button") # Output display feed_html = gr.HTML() share_html = gr.HTML(label="Share this item:") # Event handlers # Handle Enter keypress in the concept input user_input.submit( fn=start_feed, inputs=[user_input, generate_video_checkbox, current_index, feed_items], outputs=[current_user_input, current_index, feed_items, feed_html, share_html, is_loading] ) # Handle magic button click to generate next item magic_button.click( fn=load_next, inputs=[current_user_input, generate_video_checkbox, current_index, feed_items], outputs=[current_user_input, current_index, feed_items, feed_html, share_html, is_loading] ) # Hidden button for previous item navigation previous_button = gr.Button("Previous", elem_id="previous-button", visible=False) # Handle click to go to previous item previous_button.click( fn=load_previous, inputs=[current_user_input, generate_video_checkbox, current_index, feed_items], outputs=[current_user_input, current_index, feed_items, feed_html, share_html, is_loading] ) # Launch the app demo.launch()