Spaces:
Sleeping
Sleeping
# Install necessary packages | |
# !pip install huggingface_hub gradio pillow | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
import random | |
import json | |
import logging | |
import sys | |
import os | |
# Set up logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[logging.StreamHandler(sys.stdout)] | |
) | |
logger = logging.getLogger("visual_studio") | |
# Setting up model clients | |
def setup_client(api_key, provider=None): | |
try: | |
if provider: | |
client = InferenceClient(provider=provider, api_key=api_key) | |
logger.info(f"{provider} client initialized successfully") | |
else: | |
client = InferenceClient(api_key=api_key) | |
logger.info("Hugging Face client initialized successfully") | |
return client | |
except Exception as e: | |
logger.error(f"Error initializing client: {str(e)}") | |
return None | |
# Initialize clients | |
try: | |
# Replace with your actual HF API key | |
hf_api_key = os.getenv("HF_API_KEY") | |
hf_client = setup_client(hf_api_key) | |
logger.info("Hugging Face client created successfully") | |
# Set up Llama client if API key is provided | |
llama_api_key = os.getenv("HF_API_KEY") # Replace with actual key if available | |
try: | |
llama_client = setup_client(llama_api_key, "sambanova") | |
use_llama = True | |
logger.info("Llama client created successfully") | |
except Exception as e: | |
logger.warning(f"Llama client not available: {str(e)}. Will use fallback enhancement.") | |
llama_client = None | |
use_llama = False | |
except Exception as e: | |
logger.error(f"Failed to create Hugging Face client: {str(e)}") | |
hf_client = None | |
llama_client = None | |
use_llama = False | |
# Only Hugging Face hosted models | |
IMAGE_MODELS = { | |
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL (Best Quality)", | |
"runwayml/stable-diffusion-v1-5": "Stable Diffusion 1.5 (Balanced)", | |
"stabilityai/stable-diffusion-2-1": "Stable Diffusion 2.1 (Fast)", | |
"prompthero/openjourney": "OpenJourney (Midjourney-like)", | |
"dreamlike-art/dreamlike-diffusion-1.0": "Dreamlike Diffusion (Artistic)" | |
} | |
# Creation types | |
CREATION_TYPES = { | |
"Realistic Photo": "Create a photorealistic image with natural details and lighting", | |
"Digital Art": "Create colorful digital artwork with clean lines and vibrant colors", | |
"Fantasy Illustration": "Create magical and fantastical scenes with otherworldly elements", | |
"Concept Art": "Create professional concept art for characters, environments or objects", | |
"Anime/Manga": "Create Japanese anime or manga style illustration", | |
"Oil Painting": "Create an image with oil painting textures and artistic brushstrokes", | |
"Watercolor": "Create a soft watercolor illustration with subtle color blending", | |
"Sketch": "Create a detailed sketch or drawing with line art focus", | |
"3D Rendering": "Create an image that looks like a 3D rendered scene with realistic lighting", | |
"Pixel Art": "Create retro-style pixel art with limited color palette" | |
} | |
# Art styles with detailed descriptions for better Llama prompt enhancement | |
ART_STYLES = { | |
"Photorealistic": "detailed realistic style that resembles a photograph with accurate lighting and textures", | |
"Impressionist": "soft brushstrokes that capture light and atmosphere over precise details, like Monet", | |
"Surrealist": "dreamlike quality with impossible or irrational scenes, like Salvador Dali", | |
"Pop Art": "bold colors, sharp lines and popular culture references, like Andy Warhol", | |
"Minimalist": "simplified forms, limited color palette, and clean composition with minimal elements", | |
"Abstract": "non-representational style using shapes, colors, and forms to express ideas", | |
"Cubist": "geometric shapes and multiple perspectives shown simultaneously, like Picasso", | |
"Art Nouveau": "ornate, flowing lines inspired by natural forms with decorative elegance", | |
"Gothic": "dark, medieval-inspired aesthetic with dramatic lighting and architectural elements", | |
"Cyberpunk": "futuristic dystopian style with neon colors, technology, and urban decay", | |
"Steampunk": "Victorian-era aesthetic combined with steam-powered technology and brass elements", | |
"Retro/Vintage": "nostalgic style reminiscent of past decades with period-appropriate elements", | |
"Art Deco": "geometric patterns, bold colors, and luxurious materials in a symmetrical style", | |
"Baroque": "dramatic, ornate style with rich details, contrast, and dynamic composition", | |
"Ukiyo-e": "traditional Japanese woodblock print style with flat areas of color and strong outlines", | |
"Comic Book": "bold outlines, bright colors, and action-oriented composition like classic comics", | |
"Psychedelic": "vibrant, swirling colors with abstract patterns inspired by 1960s art", | |
"Vaporwave": "glitch aesthetics with pastel colors, 80s/90s nostalgia and digital elements", | |
"Studio Ghibli": "whimsical, detailed animation style inspired by Japanese animated films", | |
"Hyperrealism": "extremely detailed realism that exceeds photograph-like precision" | |
} | |
# Function to enhance prompt with Llama 4 with improved logical understanding | |
def enhance_prompt_with_llama(user_input, creation_type, art_style, mood): | |
try: | |
if not use_llama or llama_client is None: | |
logger.warning("Llama enhancement not available, using fallback") | |
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) | |
logger.info(f"Enhancing prompt with Llama 4 for creation type: {creation_type}, art style: {art_style}") | |
# Enhanced Llama 4 system prompt | |
system_prompt = """You are a world-class prompt engineer who specializes in creating detailed, effective prompts for text-to-image AI models. | |
Your task is to transform a user's simple description into a comprehensive, detailed image generation prompt that will create stunning visuals. Consider all the provided elements (description, creation type, art style, mood) and combine them into a cohesive, detailed prompt. | |
MOST IMPORTANTLY - ADD LOGICAL DETAILS: | |
- Analyze what the user wants and add logical details that would make the scene realistic or coherent | |
- If describing something fantastical (e.g., "flying cat"), add logical details about how this could work (e.g., "a cat with majestic feathered wings spread wide") | |
- Think about environment, lighting, perspective, time of day, weather, and other contextual elements | |
- Create a vivid, imaginable scene with spatial relationships clearly defined | |
PROMPT STRUCTURE GUIDELINES: | |
1. Start with the core subject and its primary characteristics | |
2. Add environment and setting details | |
3. Describe lighting, atmosphere, and mood | |
4. Include specific visual style and artistic technique references | |
5. Add technical quality terms (8K, detailed, masterful, etc.) | |
FORMAT YOUR RESPONSE AS A SINGLE PARAGRAPH with no additional comments, explanations, or bullet points. Use natural language without awkward comma separations. Aim for 75-150 words. | |
AVOID: | |
- Do not include quotation marks in your response | |
- Do not preface with "here's a prompt" or similar text | |
- Do not use placeholders | |
- Do not add negative prompts | |
- Do not write in list format or use bullet points | |
Respond only with the enhanced prompt and nothing else.""" | |
# Creation type description | |
creation_description = CREATION_TYPES.get(creation_type, "Create a detailed image") | |
# Art style description | |
style_description = ART_STYLES.get(art_style, "with detailed and professional quality") | |
# Prepare the user prompt for Llama | |
user_prompt = f"""Description: {user_input} | |
Creation Type: {creation_type} - {creation_description} | |
Art Style: {art_style} - {style_description} | |
Mood: {mood} | |
Please create a comprehensive, detailed image generation prompt that combines all these elements.""" | |
try: | |
completion = llama_client.chat.completions.create( | |
model="meta-llama/Llama-4-Scout-17B-16E-Instruct", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt} | |
], | |
max_tokens=500, | |
) | |
enhanced = completion.choices[0].message.content | |
logger.info(f"Llama 4 enhanced prompt: {enhanced[:100]}...") | |
return enhanced if enhanced else user_input | |
except Exception as e: | |
logger.error(f"Error during Llama enhancement: {str(e)}") | |
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) | |
except Exception as e: | |
logger.error(f"Error in Llama enhancement: {str(e)}") | |
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) | |
# Fallback prompt enhancement without Llama | |
def enhance_prompt_fallback(user_input, creation_type, art_style, mood): | |
logger.info(f"Using fallback enhancement for: {user_input[:50]}...") | |
# Quality terms by creation type | |
quality_terms = { | |
"Realistic Photo": [ | |
"photorealistic", "high resolution", "detailed", | |
"natural lighting", "sharp focus", "professional photography" | |
], | |
"Digital Art": [ | |
"vibrant colors", "clean lines", "digital illustration", | |
"polished", "professional digital art", "detailed rendering" | |
], | |
"Fantasy Illustration": [ | |
"magical atmosphere", "fantasy art", "detailed illustration", | |
"epic", "otherworldly", "imaginative scene" | |
], | |
"Concept Art": [ | |
"professional concept art", "detailed design", "conceptual illustration", | |
"industry standard", "visual development", "production artwork" | |
], | |
"Anime/Manga": [ | |
"anime style", "manga illustration", "cel shaded", | |
"Japanese animation", "2D character art", "anime aesthetic" | |
], | |
"Oil Painting": [ | |
"oil on canvas", "textured brushwork", "rich colors", | |
"traditional painting", "artistic brushstrokes", "gallery quality" | |
], | |
"Watercolor": [ | |
"watercolor painting", "soft color bleeding", "delicate washes", | |
"transparent layers", "loose brushwork", "gentle transitions" | |
], | |
"Sketch": [ | |
"detailed sketch", "pencil drawing", "line art", | |
"hand-drawn", "fine details", "shading techniques" | |
], | |
"3D Rendering": [ | |
"3D render", "volumetric lighting", "ray tracing", | |
"3D modeling", "realistic textures", "computer graphics" | |
], | |
"Pixel Art": [ | |
"pixel art", "8-bit style", "retro game aesthetic", | |
"limited color palette", "pixelated", "nostalgic game art" | |
] | |
} | |
# Style modifiers for different art styles | |
style_modifiers = { | |
"Photorealistic": "highly detailed photorealistic style with perfect lighting", | |
"Impressionist": "impressionist style with visible brushstrokes capturing light and atmosphere", | |
"Surrealist": "surrealist style with dreamlike and impossible elements", | |
"Pop Art": "pop art style with bold colors and cultural references", | |
"Minimalist": "minimalist style with simplified forms and limited palette", | |
"Abstract": "abstract style using non-representational shapes and colors", | |
"Cubist": "cubist style with geometric forms and multiple perspectives", | |
"Art Nouveau": "art nouveau style with ornate flowing lines and natural forms", | |
"Gothic": "gothic style with dark atmosphere and dramatic elements", | |
"Cyberpunk": "cyberpunk style with neon colors and futuristic technology", | |
"Steampunk": "steampunk style with Victorian aesthetics and brass machinery", | |
"Retro/Vintage": "retro style with nostalgic elements from past decades", | |
"Art Deco": "art deco style with geometric patterns and luxurious elements", | |
"Baroque": "baroque style with dramatic lighting and rich ornamentation", | |
"Ukiyo-e": "ukiyo-e style japanese woodblock print aesthetic", | |
"Comic Book": "comic book style with bold outlines and vibrant colors", | |
"Psychedelic": "psychedelic style with vibrant swirling colors and patterns", | |
"Vaporwave": "vaporwave aesthetic with glitch art and 80s/90s nostalgia", | |
"Studio Ghibli": "Studio Ghibli anime style with whimsical detailed environments", | |
"Hyperrealism": "hyperrealistic style with extreme detail beyond photography" | |
} | |
# Mood modifiers for different moods | |
mood_modifiers = { | |
"Happy": "bright cheerful atmosphere with warm colors", | |
"Sad": "melancholic atmosphere with muted colors", | |
"Mysterious": "enigmatic atmosphere with shadows and hidden elements", | |
"Peaceful": "serene calm atmosphere with gentle lighting", | |
"Tense": "suspenseful atmosphere with dramatic lighting", | |
"Whimsical": "playful whimsical atmosphere with imaginative elements", | |
"Dark": "dark gloomy atmosphere with deep shadows", | |
"Energetic": "dynamic vibrant atmosphere with strong colors", | |
"Romantic": "soft romantic atmosphere with dreamy lighting", | |
"Epic": "grand epic atmosphere with dramatic scale" | |
} | |
# Get terms for the specific creation type, or use generic terms | |
type_terms = quality_terms.get(creation_type, [ | |
"high quality", "detailed", "professional", "masterful" | |
]) | |
# Common quality terms | |
common_terms = [ | |
"8K resolution", "highly detailed", "professional", | |
"trending on artstation", "masterpiece" | |
] | |
# Get style modifier | |
style_modifier = style_modifiers.get(art_style, "detailed style") | |
# Get mood modifier | |
mood_modifier = mood_modifiers.get(mood, "atmospheric") | |
# Basic prompt structure | |
prompt_parts = [ | |
user_input, | |
style_modifier, | |
mood_modifier | |
] | |
# Add randomly selected quality terms | |
selected_type_terms = random.sample(type_terms, min(3, len(type_terms))) | |
selected_common_terms = random.sample(common_terms, min(2, len(common_terms))) | |
# Combine terms | |
quality_description = ", ".join(selected_type_terms + selected_common_terms) | |
# Final enhanced prompt | |
enhanced_prompt = f"{', '.join(prompt_parts)}, {quality_description}" | |
logger.info(f"Fallback enhanced prompt: {enhanced_prompt[:100]}...") | |
return enhanced_prompt | |
# Generate image function | |
def generate_image(description, creation_type, art_style, mood, model_name): | |
try: | |
logger.info(f"Generating image with model: {model_name}") | |
# Use Llama 4 for prompt enhancement with all inputs | |
enhanced_prompt = enhance_prompt_with_llama(description, creation_type, art_style, mood) | |
# Check if client is available | |
if hf_client is None: | |
logger.error("Hugging Face client not available") | |
return None, "Error: Hugging Face client not available", enhanced_prompt | |
# Generate image | |
logger.info(f"Sending request to model {model_name} with prompt: {enhanced_prompt[:100]}...") | |
image = hf_client.text_to_image( | |
prompt=enhanced_prompt, | |
model=model_name, | |
negative_prompt="low quality, blurry, distorted, deformed, disfigured, bad anatomy, watermark, signature, text" | |
) | |
logger.info("Image generated successfully") | |
# Analysis information | |
analysis = f"Image generated using model: {model_name}\n" | |
if use_llama: | |
analysis += "Prompt enhanced with Llama 4" | |
else: | |
analysis += "Prompt enhanced with fallback method" | |
return image, analysis, enhanced_prompt | |
except Exception as e: | |
logger.error(f"Error generating image: {str(e)}") | |
return None, f"Error generating image: {str(e)}", enhanced_prompt | |
# Create Gradio interface | |
with gr.Blocks(title="AI Image Creator") as interface: | |
gr.Markdown("# 🎨 AI Image Creator") | |
gr.Markdown("### Transform your ideas into stunning images with AI") | |
with gr.Row(): | |
with gr.Column(): | |
# Core inputs - streamlined for simplicity | |
description_input = gr.Textbox( | |
label="Describe what you want to see", | |
placeholder="Be detailed and specific about what you want in the image...", | |
lines=4 | |
) | |
with gr.Row(): | |
creation_type = gr.Dropdown( | |
choices=list(CREATION_TYPES.keys()), | |
value="Digital Art", | |
label="Creation Type" | |
) | |
model_selector = gr.Dropdown( | |
choices=list(IMAGE_MODELS.keys()), | |
value="stabilityai/stable-diffusion-xl-base-1.0", | |
label="Image Model" | |
) | |
with gr.Row(): | |
art_style = gr.Dropdown( | |
choices=list(ART_STYLES.keys()), | |
value="Photorealistic", | |
label="Art Style" | |
) | |
mood_dropdown = gr.Dropdown( | |
choices=["Happy", "Sad", "Mysterious", "Peaceful", "Tense", | |
"Whimsical", "Dark", "Energetic", "Romantic", "Epic"], | |
value="Peaceful", | |
label="Mood" | |
) | |
# Generate button | |
generate_button = gr.Button("✨ Generate Image", variant="primary", size="lg") | |
# Display model names in a more user-friendly way | |
def format_model_name(model_key): | |
return IMAGE_MODELS.get(model_key, model_key) | |
model_label = gr.HTML(value="") | |
model_selector.change( | |
fn=lambda x: f"<p>Selected model: <b>{format_model_name(x)}</b></p>", | |
inputs=model_selector, | |
outputs=model_label | |
) | |
with gr.Column(): | |
# Output areas | |
image_output = gr.Image(label="Generated Image") | |
with gr.Accordion("Enhanced Prompt", open=False): | |
prompt_output = gr.Textbox(label="AI-Enhanced Prompt Used", lines=6) | |
# Connect generate button | |
generate_button.click( | |
fn=generate_image, | |
inputs=[description_input, creation_type, art_style, mood_dropdown, model_selector], | |
outputs=[image_output, model_label, prompt_output] | |
) | |
# Tips | |
with gr.Accordion("Tips for better results", open=True): | |
gr.Markdown(""" | |
### 💡 Tips for better results: | |
- **Be specific** about what you want to see - include details about subjects, actions, setting | |
- **Mention colors, textures, lighting** if they're important to your vision | |
- **Try different art styles** to dramatically change the look and feel | |
- **The mood selection** influences the overall atmosphere and color palette | |
- **SDXL model** generally produces the highest quality images but takes longer | |
#### Examples of good descriptions: | |
- *"A serene lake at sunset with mountains in the background and a small wooden boat floating nearby"* | |
- *"A futuristic cityscape with flying cars, neon lights, and tall skyscrapers under a night sky with two moons"* | |
- *"A close-up portrait of an elderly craftsman with weathered hands working on an intricate wooden carving in his workshop"* | |
""") | |
# Troubleshooting | |
with gr.Accordion("Troubleshooting", open=False): | |
gr.Markdown(""" | |
### Troubleshooting Tips | |
If you encounter errors: | |
1. **Check the console/terminal** where you're running this code for detailed logs | |
2. **Verify your Hugging Face API key** is correct and has the right permissions | |
3. **Try a different model** if you get access restriction errors | |
4. **Simplify your prompt** if it's very long or complex | |
5. **Restart the app** if you've been running it for a long time | |
Common errors: | |
- 401/403 errors: API key issues or model access restrictions | |
- 429 errors: Rate limiting (too many requests) | |
- 503 errors: Service temporarily unavailable | |
""") | |
# Launch the interface | |
interface.launch() |