Spaces:

tonko22
/

LyricsAnalyzerAgent

Sleeping

App Files Files Community

tonko22 commited on Mar 26

Commit

26dfe2c

1 Parent(s): fd656a9

Forgottern file

Browse files

Files changed (1) hide show

tools/image_generation_tools.py +157 -0

tools/image_generation_tools.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+Image generation tools for visualizing song analysis results.
+"""
+import os
+from typing import Dict
+from loguru import logger
+from smolagents import Tool
+from api_utils import make_api_call_with_retry
+def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str:
+    """
+    Generate a descriptive caption for image generation based on song analysis.
+    Uses LLM to create a high-quality image prompt based on the analysis.
+    Args:
+        analysis_json: Dictionary containing the song analysis results
+        title: Song title (required)
+        artist: Song artist
+    Returns:
+        A descriptive caption suitable for image generation
+    """
+    logger.info("Generating image caption from analysis results")
+    # Use the provided title and artist
+    logger.info(f"Using song: '{title}' by '{artist}' for caption generation")
+    mood = analysis_json.get("mood") or "emotional"
+    themes = ", ".join(analysis_json.get("main_themes") or ["music"])
+    summary = analysis_json.get("summary") or ""
+    conclusion = analysis_json.get("conclusion") or ""
+    # Create an API prompt to generate a high-quality image caption
+    prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis.
+    This prompt will be used by an AI image generator to create a visual representation of the song's essence.
+    Song: {title} by {artist}
+    Mood: {mood}
+    Themes: {themes}
+    Summary: {summary[:200] if summary else ""}
+    Conclusion: {conclusion[:200] if conclusion else ""}
+    Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image
+    that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative.
+    DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only.
+    ONLY output the final image generation prompt with no additional text, explanations, or formatting.
+    """
+    # Use the same model as in lyrics analysis
+    model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
+    logger.info("Using {} for caption generation", model_to_use)
+    # Call the API to generate a caption
+    logger.info("Generating image caption for song: '{}' by '{}'", title, artist)
+    response_text = make_api_call_with_retry(model_to_use, prompt)
+    # Clean up the response if needed
+    caption = response_text.strip()
+    logger.debug(f"Generated image caption: {caption[:100]}...")
+    return caption
+class GenerateImageTool(Tool):
+    """Tool for generating images based on song analysis"""
+    name = "generate_image"
+    description = "Generates an image based on the song analysis results"
+    inputs = {
+        "analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"},
+        "title": {"type": "string", "description": "Title of the song"},
+        "artist": {"type": "string", "description": "Artist of the song"}
+    }
+    output_type = "string"
+    def generate_with_gemini(self, caption: str) -> str:
+        """
+        Generate image using Gemini API directly
+        Args:
+            caption: The prompt text for image generation
+        Returns:
+            HTML img tag with the image or error message
+        """
+        try:
+            # Правильный импорт библиотеки
+            import google.generativeai as genai
+            from google.generativeai import types
+            # Get API key from environment variable
+            api_key = os.environ.get("GEMINI_API_KEY")
+            if not api_key:
+                logger.error("GEMINI_API_KEY not found in environment variables")
+                return "<p>Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.</p>"
+            logger.info("Initializing Gemini client")
+            genai.configure(api_key=api_key)
+            client = genai.Client()
+            logger.info("Generating image with Gemini")
+            response = client.models.generate_content(
+                model="gemini-2.0-flash-exp-image-generation",
+                contents=caption,
+                config=types.GenerateContentConfig(
+                    response_modalities=['Text', 'Image']
+                )
+            )
+            # Process the response
+            for part in response.candidates[0].content.parts:
+                if part.text is not None:
+                    logger.info(f"Gemini response text: {part.text[:100]}...")
+                elif hasattr(part, 'inline_data') and part.inline_data is not None:
+                    # Save the image to a temporary file
+                    # inline_data.data уже содержит данные в формате base64
+                    image_b64 = part.inline_data.data
+                    img_html = f'<img src="data:image/png;base64,{image_b64}" alt="Generated image based on song analysis" style="max-width:100%; border-radius:10px; box-shadow:0 4px 8px rgba(0,0,0,0.1);">'
+                    return img_html
+            return "<p>Error: No image generated by Gemini API.</p>"
+        except ImportError:
+            logger.error("Google GenAI package not installed")
+            return "<p>Error: Google GenAI package not installed. Install with 'pip install google-generativeai'</p>"
+        except Exception as e:
+            logger.error(f"Error generating image with Gemini: {str(e)}")
+            return f"<p>Error generating image with Gemini: {str(e)}</p>"
+    def forward(self, analysis_json: Dict, title: str, artist: str) -> str:
+        """
+        Generates an image based on the analysis results using Gemini API.
+        Args:
+            analysis_json: Dictionary containing the analysis results
+            title: Song title
+            artist: Song artist (required)
+        Returns:
+            HTML img tag with the image or error message
+        """
+        try:
+            # Generate caption for the image
+            caption = caption_gen_tool(analysis_json, title=title, artist=artist)
+            logger.info("Caption generated successfully")
+            logger.warning("OpenRouter failed, falling back to Gemini API")
+            # Fall back to Gemini API
+            result = self.generate_with_gemini(caption)
+            return result
+        except Exception as e:
+            logger.error(f"Error in image generation: {str(e)}")
+            return f"<p>Error in image generation: {str(e)}</p>"