tonko22 commited on
Commit
26dfe2c
·
1 Parent(s): fd656a9

Forgottern file

Browse files
Files changed (1) hide show
  1. tools/image_generation_tools.py +157 -0
tools/image_generation_tools.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image generation tools for visualizing song analysis results.
3
+ """
4
+
5
+ import os
6
+ from typing import Dict
7
+ from loguru import logger
8
+ from smolagents import Tool
9
+
10
+ from api_utils import make_api_call_with_retry
11
+
12
+
13
+ def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str:
14
+ """
15
+ Generate a descriptive caption for image generation based on song analysis.
16
+ Uses LLM to create a high-quality image prompt based on the analysis.
17
+
18
+ Args:
19
+ analysis_json: Dictionary containing the song analysis results
20
+ title: Song title (required)
21
+ artist: Song artist
22
+
23
+ Returns:
24
+ A descriptive caption suitable for image generation
25
+ """
26
+ logger.info("Generating image caption from analysis results")
27
+
28
+ # Use the provided title and artist
29
+ logger.info(f"Using song: '{title}' by '{artist}' for caption generation")
30
+ mood = analysis_json.get("mood") or "emotional"
31
+ themes = ", ".join(analysis_json.get("main_themes") or ["music"])
32
+ summary = analysis_json.get("summary") or ""
33
+ conclusion = analysis_json.get("conclusion") or ""
34
+
35
+ # Create an API prompt to generate a high-quality image caption
36
+ prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis.
37
+ This prompt will be used by an AI image generator to create a visual representation of the song's essence.
38
+
39
+ Song: {title} by {artist}
40
+ Mood: {mood}
41
+ Themes: {themes}
42
+ Summary: {summary[:200] if summary else ""}
43
+ Conclusion: {conclusion[:200] if conclusion else ""}
44
+
45
+ Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image
46
+ that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative.
47
+ DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only.
48
+
49
+ ONLY output the final image generation prompt with no additional text, explanations, or formatting.
50
+ """
51
+
52
+ # Use the same model as in lyrics analysis
53
+ model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
54
+ logger.info("Using {} for caption generation", model_to_use)
55
+
56
+ # Call the API to generate a caption
57
+ logger.info("Generating image caption for song: '{}' by '{}'", title, artist)
58
+ response_text = make_api_call_with_retry(model_to_use, prompt)
59
+
60
+ # Clean up the response if needed
61
+ caption = response_text.strip()
62
+ logger.debug(f"Generated image caption: {caption[:100]}...")
63
+
64
+ return caption
65
+
66
+
67
+ class GenerateImageTool(Tool):
68
+ """Tool for generating images based on song analysis"""
69
+
70
+ name = "generate_image"
71
+ description = "Generates an image based on the song analysis results"
72
+ inputs = {
73
+ "analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"},
74
+ "title": {"type": "string", "description": "Title of the song"},
75
+ "artist": {"type": "string", "description": "Artist of the song"}
76
+ }
77
+ output_type = "string"
78
+
79
+ def generate_with_gemini(self, caption: str) -> str:
80
+ """
81
+ Generate image using Gemini API directly
82
+
83
+ Args:
84
+ caption: The prompt text for image generation
85
+
86
+ Returns:
87
+ HTML img tag with the image or error message
88
+ """
89
+ try:
90
+ # Правильный импорт библиотеки
91
+ import google.generativeai as genai
92
+ from google.generativeai import types
93
+
94
+ # Get API key from environment variable
95
+ api_key = os.environ.get("GEMINI_API_KEY")
96
+ if not api_key:
97
+ logger.error("GEMINI_API_KEY not found in environment variables")
98
+ return "<p>Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.</p>"
99
+
100
+ logger.info("Initializing Gemini client")
101
+ genai.configure(api_key=api_key)
102
+ client = genai.Client()
103
+
104
+ logger.info("Generating image with Gemini")
105
+ response = client.models.generate_content(
106
+ model="gemini-2.0-flash-exp-image-generation",
107
+ contents=caption,
108
+ config=types.GenerateContentConfig(
109
+ response_modalities=['Text', 'Image']
110
+ )
111
+ )
112
+
113
+ # Process the response
114
+ for part in response.candidates[0].content.parts:
115
+ if part.text is not None:
116
+ logger.info(f"Gemini response text: {part.text[:100]}...")
117
+ elif hasattr(part, 'inline_data') and part.inline_data is not None:
118
+ # Save the image to a temporary file
119
+ # inline_data.data уже содержит данные в формате base64
120
+ image_b64 = part.inline_data.data
121
+ img_html = f'<img src="data:image/png;base64,{image_b64}" alt="Generated image based on song analysis" style="max-width:100%; border-radius:10px; box-shadow:0 4px 8px rgba(0,0,0,0.1);">'
122
+ return img_html
123
+
124
+ return "<p>Error: No image generated by Gemini API.</p>"
125
+
126
+ except ImportError:
127
+ logger.error("Google GenAI package not installed")
128
+ return "<p>Error: Google GenAI package not installed. Install with 'pip install google-generativeai'</p>"
129
+ except Exception as e:
130
+ logger.error(f"Error generating image with Gemini: {str(e)}")
131
+ return f"<p>Error generating image with Gemini: {str(e)}</p>"
132
+
133
+ def forward(self, analysis_json: Dict, title: str, artist: str) -> str:
134
+ """
135
+ Generates an image based on the analysis results using Gemini API.
136
+
137
+ Args:
138
+ analysis_json: Dictionary containing the analysis results
139
+ title: Song title
140
+ artist: Song artist (required)
141
+
142
+ Returns:
143
+ HTML img tag with the image or error message
144
+ """
145
+ try:
146
+ # Generate caption for the image
147
+ caption = caption_gen_tool(analysis_json, title=title, artist=artist)
148
+ logger.info("Caption generated successfully")
149
+
150
+ logger.warning("OpenRouter failed, falling back to Gemini API")
151
+ # Fall back to Gemini API
152
+ result = self.generate_with_gemini(caption)
153
+ return result
154
+
155
+ except Exception as e:
156
+ logger.error(f"Error in image generation: {str(e)}")
157
+ return f"<p>Error in image generation: {str(e)}</p>"