|
|
|
|
|
|
|
from huggingface_hub import InferenceClient |
|
import gradio as gr |
|
import random |
|
import json |
|
import logging |
|
import sys |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[logging.StreamHandler(sys.stdout)] |
|
) |
|
logger = logging.getLogger("visual_studio") |
|
|
|
|
|
def setup_client(api_key, provider=None): |
|
try: |
|
if provider: |
|
client = InferenceClient(provider=provider, api_key=api_key) |
|
logger.info(f"{provider} client initialized successfully") |
|
else: |
|
client = InferenceClient(api_key=api_key) |
|
logger.info("Hugging Face client initialized successfully") |
|
return client |
|
except Exception as e: |
|
logger.error(f"Error initializing client: {str(e)}") |
|
return None |
|
|
|
|
|
try: |
|
|
|
hf_api_key = os.getenv("HF_API_KEY") |
|
hf_client = setup_client(hf_api_key) |
|
logger.info("Hugging Face client created successfully") |
|
|
|
|
|
llama_api_key = os.getenv("HF_API_KEY") |
|
try: |
|
llama_client = setup_client(llama_api_key, "sambanova") |
|
use_llama = True |
|
logger.info("Llama client created successfully") |
|
except Exception as e: |
|
logger.warning(f"Llama client not available: {str(e)}. Will use fallback enhancement.") |
|
llama_client = None |
|
use_llama = False |
|
except Exception as e: |
|
logger.error(f"Failed to create Hugging Face client: {str(e)}") |
|
hf_client = None |
|
llama_client = None |
|
use_llama = False |
|
|
|
|
|
IMAGE_MODELS = { |
|
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL (Best Quality)", |
|
"runwayml/stable-diffusion-v1-5": "Stable Diffusion 1.5 (Balanced)", |
|
"stabilityai/stable-diffusion-2-1": "Stable Diffusion 2.1 (Fast)", |
|
"prompthero/openjourney": "OpenJourney (Midjourney-like)", |
|
"dreamlike-art/dreamlike-diffusion-1.0": "Dreamlike Diffusion (Artistic)" |
|
} |
|
|
|
|
|
CREATION_TYPES = { |
|
"Realistic Photo": "Create a photorealistic image with natural details and lighting", |
|
"Digital Art": "Create colorful digital artwork with clean lines and vibrant colors", |
|
"Fantasy Illustration": "Create magical and fantastical scenes with otherworldly elements", |
|
"Concept Art": "Create professional concept art for characters, environments or objects", |
|
"Anime/Manga": "Create Japanese anime or manga style illustration", |
|
"Oil Painting": "Create an image with oil painting textures and artistic brushstrokes", |
|
"Watercolor": "Create a soft watercolor illustration with subtle color blending", |
|
"Sketch": "Create a detailed sketch or drawing with line art focus", |
|
"3D Rendering": "Create an image that looks like a 3D rendered scene with realistic lighting", |
|
"Pixel Art": "Create retro-style pixel art with limited color palette" |
|
} |
|
|
|
|
|
ART_STYLES = { |
|
"Photorealistic": "detailed realistic style that resembles a photograph with accurate lighting and textures", |
|
"Impressionist": "soft brushstrokes that capture light and atmosphere over precise details, like Monet", |
|
"Surrealist": "dreamlike quality with impossible or irrational scenes, like Salvador Dali", |
|
"Pop Art": "bold colors, sharp lines and popular culture references, like Andy Warhol", |
|
"Minimalist": "simplified forms, limited color palette, and clean composition with minimal elements", |
|
"Abstract": "non-representational style using shapes, colors, and forms to express ideas", |
|
"Cubist": "geometric shapes and multiple perspectives shown simultaneously, like Picasso", |
|
"Art Nouveau": "ornate, flowing lines inspired by natural forms with decorative elegance", |
|
"Gothic": "dark, medieval-inspired aesthetic with dramatic lighting and architectural elements", |
|
"Cyberpunk": "futuristic dystopian style with neon colors, technology, and urban decay", |
|
"Steampunk": "Victorian-era aesthetic combined with steam-powered technology and brass elements", |
|
"Retro/Vintage": "nostalgic style reminiscent of past decades with period-appropriate elements", |
|
"Art Deco": "geometric patterns, bold colors, and luxurious materials in a symmetrical style", |
|
"Baroque": "dramatic, ornate style with rich details, contrast, and dynamic composition", |
|
"Ukiyo-e": "traditional Japanese woodblock print style with flat areas of color and strong outlines", |
|
"Comic Book": "bold outlines, bright colors, and action-oriented composition like classic comics", |
|
"Psychedelic": "vibrant, swirling colors with abstract patterns inspired by 1960s art", |
|
"Vaporwave": "glitch aesthetics with pastel colors, 80s/90s nostalgia and digital elements", |
|
"Studio Ghibli": "whimsical, detailed animation style inspired by Japanese animated films", |
|
"Hyperrealism": "extremely detailed realism that exceeds photograph-like precision" |
|
} |
|
|
|
|
|
def enhance_prompt_with_llama(user_input, creation_type, art_style, mood): |
|
try: |
|
if not use_llama or llama_client is None: |
|
logger.warning("Llama enhancement not available, using fallback") |
|
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) |
|
|
|
logger.info(f"Enhancing prompt with Llama 4 for creation type: {creation_type}, art style: {art_style}") |
|
|
|
|
|
system_prompt = """You are a world-class prompt engineer who specializes in creating detailed, effective prompts for text-to-image AI models. |
|
|
|
Your task is to transform a user's simple description into a comprehensive, detailed image generation prompt that will create stunning visuals. Consider all the provided elements (description, creation type, art style, mood) and combine them into a cohesive, detailed prompt. |
|
|
|
MOST IMPORTANTLY - ADD LOGICAL DETAILS: |
|
- Analyze what the user wants and add logical details that would make the scene realistic or coherent |
|
- If describing something fantastical (e.g., "flying cat"), add logical details about how this could work (e.g., "a cat with majestic feathered wings spread wide") |
|
- Think about environment, lighting, perspective, time of day, weather, and other contextual elements |
|
- Create a vivid, imaginable scene with spatial relationships clearly defined |
|
|
|
PROMPT STRUCTURE GUIDELINES: |
|
1. Start with the core subject and its primary characteristics |
|
2. Add environment and setting details |
|
3. Describe lighting, atmosphere, and mood |
|
4. Include specific visual style and artistic technique references |
|
5. Add technical quality terms (8K, detailed, masterful, etc.) |
|
|
|
FORMAT YOUR RESPONSE AS A SINGLE PARAGRAPH with no additional comments, explanations, or bullet points. Use natural language without awkward comma separations. Aim for 75-150 words. |
|
|
|
AVOID: |
|
- Do not include quotation marks in your response |
|
- Do not preface with "here's a prompt" or similar text |
|
- Do not use placeholders |
|
- Do not add negative prompts |
|
- Do not write in list format or use bullet points |
|
|
|
Respond only with the enhanced prompt and nothing else.""" |
|
|
|
|
|
creation_description = CREATION_TYPES.get(creation_type, "Create a detailed image") |
|
|
|
|
|
style_description = ART_STYLES.get(art_style, "with detailed and professional quality") |
|
|
|
|
|
user_prompt = f"""Description: {user_input} |
|
Creation Type: {creation_type} - {creation_description} |
|
Art Style: {art_style} - {style_description} |
|
Mood: {mood} |
|
|
|
Please create a comprehensive, detailed image generation prompt that combines all these elements.""" |
|
|
|
try: |
|
completion = llama_client.chat.completions.create( |
|
model="meta-llama/Llama-4-Scout-17B-16E-Instruct", |
|
messages=[ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": user_prompt} |
|
], |
|
max_tokens=500, |
|
) |
|
enhanced = completion.choices[0].message.content |
|
logger.info(f"Llama 4 enhanced prompt: {enhanced[:100]}...") |
|
return enhanced if enhanced else user_input |
|
except Exception as e: |
|
logger.error(f"Error during Llama enhancement: {str(e)}") |
|
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) |
|
except Exception as e: |
|
logger.error(f"Error in Llama enhancement: {str(e)}") |
|
return enhance_prompt_fallback(user_input, creation_type, art_style, mood) |
|
|
|
|
|
def enhance_prompt_fallback(user_input, creation_type, art_style, mood): |
|
logger.info(f"Using fallback enhancement for: {user_input[:50]}...") |
|
|
|
|
|
quality_terms = { |
|
"Realistic Photo": [ |
|
"photorealistic", "high resolution", "detailed", |
|
"natural lighting", "sharp focus", "professional photography" |
|
], |
|
"Digital Art": [ |
|
"vibrant colors", "clean lines", "digital illustration", |
|
"polished", "professional digital art", "detailed rendering" |
|
], |
|
"Fantasy Illustration": [ |
|
"magical atmosphere", "fantasy art", "detailed illustration", |
|
"epic", "otherworldly", "imaginative scene" |
|
], |
|
"Concept Art": [ |
|
"professional concept art", "detailed design", "conceptual illustration", |
|
"industry standard", "visual development", "production artwork" |
|
], |
|
"Anime/Manga": [ |
|
"anime style", "manga illustration", "cel shaded", |
|
"Japanese animation", "2D character art", "anime aesthetic" |
|
], |
|
"Oil Painting": [ |
|
"oil on canvas", "textured brushwork", "rich colors", |
|
"traditional painting", "artistic brushstrokes", "gallery quality" |
|
], |
|
"Watercolor": [ |
|
"watercolor painting", "soft color bleeding", "delicate washes", |
|
"transparent layers", "loose brushwork", "gentle transitions" |
|
], |
|
"Sketch": [ |
|
"detailed sketch", "pencil drawing", "line art", |
|
"hand-drawn", "fine details", "shading techniques" |
|
], |
|
"3D Rendering": [ |
|
"3D render", "volumetric lighting", "ray tracing", |
|
"3D modeling", "realistic textures", "computer graphics" |
|
], |
|
"Pixel Art": [ |
|
"pixel art", "8-bit style", "retro game aesthetic", |
|
"limited color palette", "pixelated", "nostalgic game art" |
|
] |
|
} |
|
|
|
|
|
style_modifiers = { |
|
"Photorealistic": "highly detailed photorealistic style with perfect lighting", |
|
"Impressionist": "impressionist style with visible brushstrokes capturing light and atmosphere", |
|
"Surrealist": "surrealist style with dreamlike and impossible elements", |
|
"Pop Art": "pop art style with bold colors and cultural references", |
|
"Minimalist": "minimalist style with simplified forms and limited palette", |
|
"Abstract": "abstract style using non-representational shapes and colors", |
|
"Cubist": "cubist style with geometric forms and multiple perspectives", |
|
"Art Nouveau": "art nouveau style with ornate flowing lines and natural forms", |
|
"Gothic": "gothic style with dark atmosphere and dramatic elements", |
|
"Cyberpunk": "cyberpunk style with neon colors and futuristic technology", |
|
"Steampunk": "steampunk style with Victorian aesthetics and brass machinery", |
|
"Retro/Vintage": "retro style with nostalgic elements from past decades", |
|
"Art Deco": "art deco style with geometric patterns and luxurious elements", |
|
"Baroque": "baroque style with dramatic lighting and rich ornamentation", |
|
"Ukiyo-e": "ukiyo-e style japanese woodblock print aesthetic", |
|
"Comic Book": "comic book style with bold outlines and vibrant colors", |
|
"Psychedelic": "psychedelic style with vibrant swirling colors and patterns", |
|
"Vaporwave": "vaporwave aesthetic with glitch art and 80s/90s nostalgia", |
|
"Studio Ghibli": "Studio Ghibli anime style with whimsical detailed environments", |
|
"Hyperrealism": "hyperrealistic style with extreme detail beyond photography" |
|
} |
|
|
|
|
|
mood_modifiers = { |
|
"Happy": "bright cheerful atmosphere with warm colors", |
|
"Sad": "melancholic atmosphere with muted colors", |
|
"Mysterious": "enigmatic atmosphere with shadows and hidden elements", |
|
"Peaceful": "serene calm atmosphere with gentle lighting", |
|
"Tense": "suspenseful atmosphere with dramatic lighting", |
|
"Whimsical": "playful whimsical atmosphere with imaginative elements", |
|
"Dark": "dark gloomy atmosphere with deep shadows", |
|
"Energetic": "dynamic vibrant atmosphere with strong colors", |
|
"Romantic": "soft romantic atmosphere with dreamy lighting", |
|
"Epic": "grand epic atmosphere with dramatic scale" |
|
} |
|
|
|
|
|
type_terms = quality_terms.get(creation_type, [ |
|
"high quality", "detailed", "professional", "masterful" |
|
]) |
|
|
|
|
|
common_terms = [ |
|
"8K resolution", "highly detailed", "professional", |
|
"trending on artstation", "masterpiece" |
|
] |
|
|
|
|
|
style_modifier = style_modifiers.get(art_style, "detailed style") |
|
|
|
|
|
mood_modifier = mood_modifiers.get(mood, "atmospheric") |
|
|
|
|
|
prompt_parts = [ |
|
user_input, |
|
style_modifier, |
|
mood_modifier |
|
] |
|
|
|
|
|
selected_type_terms = random.sample(type_terms, min(3, len(type_terms))) |
|
selected_common_terms = random.sample(common_terms, min(2, len(common_terms))) |
|
|
|
|
|
quality_description = ", ".join(selected_type_terms + selected_common_terms) |
|
|
|
|
|
enhanced_prompt = f"{', '.join(prompt_parts)}, {quality_description}" |
|
|
|
logger.info(f"Fallback enhanced prompt: {enhanced_prompt[:100]}...") |
|
return enhanced_prompt |
|
|
|
|
|
def generate_image(description, creation_type, art_style, mood, model_name): |
|
try: |
|
logger.info(f"Generating image with model: {model_name}") |
|
|
|
|
|
enhanced_prompt = enhance_prompt_with_llama(description, creation_type, art_style, mood) |
|
|
|
|
|
if hf_client is None: |
|
logger.error("Hugging Face client not available") |
|
return None, "Error: Hugging Face client not available", enhanced_prompt |
|
|
|
|
|
logger.info(f"Sending request to model {model_name} with prompt: {enhanced_prompt[:100]}...") |
|
image = hf_client.text_to_image( |
|
prompt=enhanced_prompt, |
|
model=model_name, |
|
negative_prompt="low quality, blurry, distorted, deformed, disfigured, bad anatomy, watermark, signature, text" |
|
) |
|
|
|
logger.info("Image generated successfully") |
|
|
|
|
|
analysis = f"Image generated using model: {model_name}\n" |
|
if use_llama: |
|
analysis += "Prompt enhanced with Llama 4" |
|
else: |
|
analysis += "Prompt enhanced with fallback method" |
|
|
|
return image, analysis, enhanced_prompt |
|
except Exception as e: |
|
logger.error(f"Error generating image: {str(e)}") |
|
return None, f"Error generating image: {str(e)}", enhanced_prompt |
|
|
|
|
|
with gr.Blocks(title="AI Image Creator") as interface: |
|
gr.Markdown("# 🎨 AI Image Creator") |
|
gr.Markdown("### Transform your ideas into stunning images with AI") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
description_input = gr.Textbox( |
|
label="Describe what you want to see", |
|
placeholder="Be detailed and specific about what you want in the image...", |
|
lines=4 |
|
) |
|
|
|
with gr.Row(): |
|
creation_type = gr.Dropdown( |
|
choices=list(CREATION_TYPES.keys()), |
|
value="Digital Art", |
|
label="Creation Type" |
|
) |
|
|
|
model_selector = gr.Dropdown( |
|
choices=list(IMAGE_MODELS.keys()), |
|
value="stabilityai/stable-diffusion-xl-base-1.0", |
|
label="Image Model" |
|
) |
|
|
|
with gr.Row(): |
|
art_style = gr.Dropdown( |
|
choices=list(ART_STYLES.keys()), |
|
value="Photorealistic", |
|
label="Art Style" |
|
) |
|
|
|
mood_dropdown = gr.Dropdown( |
|
choices=["Happy", "Sad", "Mysterious", "Peaceful", "Tense", |
|
"Whimsical", "Dark", "Energetic", "Romantic", "Epic"], |
|
value="Peaceful", |
|
label="Mood" |
|
) |
|
|
|
|
|
generate_button = gr.Button("✨ Generate Image", variant="primary", size="lg") |
|
|
|
|
|
def format_model_name(model_key): |
|
return IMAGE_MODELS.get(model_key, model_key) |
|
|
|
model_label = gr.HTML(value="") |
|
model_selector.change( |
|
fn=lambda x: f"<p>Selected model: <b>{format_model_name(x)}</b></p>", |
|
inputs=model_selector, |
|
outputs=model_label |
|
) |
|
|
|
with gr.Column(): |
|
|
|
image_output = gr.Image(label="Generated Image") |
|
|
|
with gr.Accordion("Enhanced Prompt", open=False): |
|
prompt_output = gr.Textbox(label="AI-Enhanced Prompt Used", lines=6) |
|
|
|
|
|
generate_button.click( |
|
fn=generate_image, |
|
inputs=[description_input, creation_type, art_style, mood_dropdown, model_selector], |
|
outputs=[image_output, model_label, prompt_output] |
|
) |
|
|
|
|
|
with gr.Accordion("Tips for better results", open=True): |
|
gr.Markdown(""" |
|
### 💡 Tips for better results: |
|
|
|
- **Be specific** about what you want to see - include details about subjects, actions, setting |
|
- **Mention colors, textures, lighting** if they're important to your vision |
|
- **Try different art styles** to dramatically change the look and feel |
|
- **The mood selection** influences the overall atmosphere and color palette |
|
- **SDXL model** generally produces the highest quality images but takes longer |
|
|
|
#### Examples of good descriptions: |
|
|
|
- *"A serene lake at sunset with mountains in the background and a small wooden boat floating nearby"* |
|
- *"A futuristic cityscape with flying cars, neon lights, and tall skyscrapers under a night sky with two moons"* |
|
- *"A close-up portrait of an elderly craftsman with weathered hands working on an intricate wooden carving in his workshop"* |
|
""") |
|
|
|
|
|
with gr.Accordion("Troubleshooting", open=False): |
|
gr.Markdown(""" |
|
### Troubleshooting Tips |
|
|
|
If you encounter errors: |
|
|
|
1. **Check the console/terminal** where you're running this code for detailed logs |
|
2. **Verify your Hugging Face API key** is correct and has the right permissions |
|
3. **Try a different model** if you get access restriction errors |
|
4. **Simplify your prompt** if it's very long or complex |
|
5. **Restart the app** if you've been running it for a long time |
|
|
|
Common errors: |
|
- 401/403 errors: API key issues or model access restrictions |
|
- 429 errors: Rate limiting (too many requests) |
|
- 503 errors: Service temporarily unavailable |
|
""") |
|
|
|
|
|
interface.launch() |