codelion commited on
Commit
0e27fad
·
verified ·
1 Parent(s): 7dfe473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -172
app.py CHANGED
@@ -141,188 +141,219 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
141
  user_input (str): The user's input concept or idea.
142
  ideas (list): List of ideas to choose from.
143
  generate_video (bool): Whether to generate a video from the image.
144
- max_retries (int): Maximum number of retries for both image and video generation.
145
 
146
  Returns:
147
  dict: A dictionary with 'text' (str), 'image_base64' (str), 'video_base64' (str or None), and 'ideas' (list).
148
  """
149
  video_base64 = None
 
150
 
151
- # Retry loop for image generation
152
- for attempt in range(max_retries):
153
- selected_idea = random.choice(ideas)
154
- prompt = f"""
155
- The user has provided the concept: "{user_input}". Based on this concept and the specific idea "{selected_idea}", create content for a TikTok video.
156
- Return a JSON object with two keys:
157
- - 'caption': A short, viral TikTok-style caption with hashtags that reflects "{user_input}".
158
- - 'image_prompt': A detailed image prompt for generating a high-quality visual scene, ensuring the theme of "{user_input}" is central.
159
- The image prompt should describe the scene vividly, specify a perspective and style, and ensure no text or letters are included.
160
- Ensure the response is strictly in JSON format.
161
- Example: {{"caption": "Blindfolded Rubik's Cube MAGIC! 🤯 #rubiks", "image_prompt": "A close-up view of a person solving a Rubik's Cube blindfolded, in a dramatic style, no text or letters"}}
162
- """
163
- try:
164
- response = client.models.generate_content(
165
- model='gemini-2.0-flash',
166
- contents=[prompt],
167
- config=types.GenerateContentConfig(temperature=1.2)
168
- )
169
- print(f"Raw response for item (attempt {attempt + 1}): {response.text}") # Debugging
170
- if not response.text or response.text.isspace():
171
- raise ValueError("Empty response from API")
172
- cleaned_text = clean_response_text(response.text)
173
- response_json = json.loads(cleaned_text)
174
- if 'caption' not in response_json or 'image_prompt' not in response_json:
175
- raise ValueError("Invalid JSON format: 'caption' or 'image_prompt' key missing")
176
- text = response_json['caption']
177
- image_prompt = response_json['image_prompt']
178
- except Exception as e:
179
- print(f"Error generating item (attempt {attempt + 1}): {e}")
180
- text = f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}"
181
- image_prompt = f"A vivid scene of {selected_idea} related to {user_input}, in a vibrant pop art style, no text or letters"
182
-
183
- # Attempt to generate the image
184
- try:
185
- imagen = client.models.generate_images(
186
- model='imagen-3.0-generate-002',
187
- prompt=image_prompt,
188
- config=types.GenerateImagesConfig(
189
- aspect_ratio="9:16",
190
- number_of_images=1
 
 
 
 
 
191
  )
192
- )
193
- if imagen.generated_images and len(imagen.generated_images) > 0:
194
- generated_image = imagen.generated_images[0]
195
- image = Image.open(BytesIO(generated_image.image.image_bytes))
196
- # Ensure the image matches the desired aspect ratio (9:16 = 0.5625)
197
- target_width = 360
198
- target_height = int(target_width / 9 * 16) # 9:16 aspect ratio
199
- image = image.resize((target_width, target_height), Image.LANCZOS)
200
- # Convert image to base64
201
- buffered = BytesIO()
202
- image.save(buffered, format="PNG")
203
- img_str = base64.b64encode(buffered.getvalue()).decode()
204
-
205
- # Generate video if enabled (with retries)
206
- if generate_video:
207
- for video_attempt in range(max_retries):
208
- try:
209
- # Base video prompt
210
- video_prompt_base = f"""
211
- The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
212
- Use a close-up shot with a slow dolly shot circling around the subject,
213
- using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting.
214
- """
215
- # Modify the prompt slightly for each retry
216
- if video_attempt == 0:
217
- video_prompt = video_prompt_base
218
- elif video_attempt == 1:
219
- video_prompt = f"""
220
- The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
221
- Use a close-up shot focusing on the subject,
222
- with soft lighting and a realistic style.
223
- """
224
- else:
225
- video_prompt = f"""
226
- The user concept is "{user_input}". Based on this and a simplified scene: {image_prompt}, create a video.
227
- Use a static close-up shot of the subject in a realistic style.
228
- """
229
-
230
- print(f"Attempting video generation (attempt {video_attempt + 1}): {video_prompt}")
231
- operation = client.models.generate_videos(
232
- model="veo-2.0-generate-001",
233
- prompt=video_prompt,
234
- image=generated_image.image,
235
- config=types.GenerateVideosConfig(
236
- aspect_ratio="9:16",
237
- number_of_videos=1,
238
- duration_seconds=8,
239
- negative_prompt="blurry, low quality, text, letters"
240
- )
241
- )
242
- # Wait for video to generate
243
- while not operation.done:
244
- time.sleep(20)
245
- operation = client.operations.get(operation)
246
-
247
- # Enhanced error handling for video generation response
248
- if operation.error:
249
- raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
250
- if operation.response is None:
251
- raise ValueError("Video generation operation failed: No response")
252
- if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
253
- raise ValueError("Video generation operation failed: No generated_videos in response")
254
-
255
- # Process the single generated video
256
- if len(operation.response.generated_videos) > 0:
257
- video = operation.response.generated_videos[0]
258
- if video is None or not hasattr(video, 'video'):
259
- raise ValueError("Video is invalid or missing video data")
260
- fname = 'with_image_input.mp4'
261
- print(f"Generated video: {fname}")
262
- # Download the video and get the raw bytes
263
- video_data = client.files.download(file=video.video)
264
- # Ensure video_data is in bytes
265
- if isinstance(video_data, bytes):
266
- video_bytes = video_data
267
- else:
268
- # If video_data is a file-like object, read the bytes
269
- video_buffer = BytesIO()
270
- for chunk in video_data:
271
- video_buffer.write(chunk)
272
- video_bytes = video_buffer.getvalue()
273
- # Encode the video bytes as base64
274
- video_base64 = base64.b64encode(video_bytes).decode()
275
- break # Success, exit the retry loop
276
- else:
277
- raise ValueError("No video was generated")
278
- except Exception as e:
279
- print(f"Error generating video (attempt {video_attempt + 1}): {e}")
280
- if video_attempt == max_retries - 1:
281
- print("Max retries reached for video generation. Proceeding without video.")
282
- video_base64 = None
283
- else:
284
- continue # Retry with a modified prompt
285
-
286
- return {
287
- 'text': text,
288
- 'image_base64': img_str,
289
- 'video_base64': video_base64,
290
- 'ideas': ideas
291
- }
292
- else:
293
- print(f"Image generation failed (attempt {attempt + 1}): No images returned")
294
- if attempt == max_retries - 1:
295
- # Last attempt, use a gray placeholder
296
- image = Image.new('RGB', (360, 640), color='gray')
297
  buffered = BytesIO()
298
  image.save(buffered, format="PNG")
299
  img_str = base64.b64encode(buffered.getvalue()).decode()
300
- return {
301
- 'text': text,
302
- 'image_base64': img_str,
303
- 'video_base64': None,
304
- 'ideas': ideas
305
- }
306
- # Retry with new ideas
307
- ideas = generate_ideas(user_input)
308
- continue
309
- except Exception as e:
310
- print(f"Error generating image (attempt {attempt + 1}): {e}")
311
- if attempt == max_retries - 1:
312
- # Last attempt, use a gray placeholder
313
- image = Image.new('RGB', (360, 640), color='gray')
314
- buffered = BytesIO()
315
- image.save(buffered, format="PNG")
316
- img_str = base64.b64encode(buffered.getvalue()).decode()
317
- return {
318
- 'text': text,
319
- 'image_base64': img_str,
320
- 'video_base64': None,
321
- 'ideas': ideas
322
- }
323
- # Retry with new ideas
324
- ideas = generate_ideas(user_input)
325
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  def start_feed(user_input, generate_video, current_index, feed_items):
328
  """
 
141
  user_input (str): The user's input concept or idea.
142
  ideas (list): List of ideas to choose from.
143
  generate_video (bool): Whether to generate a video from the image.
144
+ max_retries (int): Maximum number of retries for image generation per cycle.
145
 
146
  Returns:
147
  dict: A dictionary with 'text' (str), 'image_base64' (str), 'video_base64' (str or None), and 'ideas' (list).
148
  """
149
  video_base64 = None
150
+ max_total_attempts = 3 # Maximum total attempts for combined image and video generation cycles
151
 
152
+ total_attempts = 0
153
+ while total_attempts < max_total_attempts:
154
+ total_attempts += 1
155
+
156
+ # Step 1: Generate an image (retry up to max_retries times)
157
+ for image_attempt in range(max_retries):
158
+ selected_idea = random.choice(ideas)
159
+ prompt = f"""
160
+ The user has provided the concept: "{user_input}". Based on this concept and the specific idea "{selected_idea}", create content for a TikTok video.
161
+ Return a JSON object with two keys:
162
+ - 'caption': A short, viral TikTok-style caption with hashtags that reflects "{user_input}".
163
+ - 'image_prompt': A detailed image prompt for generating a high-quality visual scene, ensuring the theme of "{user_input}" is central.
164
+ The image prompt should describe the scene vividly, specify a perspective and style, and ensure no text or letters are included.
165
+ Ensure the response is strictly in JSON format.
166
+ Example: {{"caption": "Blindfolded Rubik's Cube MAGIC! 🤯 #rubiks", "image_prompt": "A close-up view of a person solving a Rubik's Cube blindfolded, in a dramatic style, no text or letters"}}
167
+ """
168
+ try:
169
+ response = client.models.generate_content(
170
+ model='gemini-2.0-flash',
171
+ contents=[prompt],
172
+ config=types.GenerateContentConfig(temperature=1.2)
173
+ )
174
+ print(f"Raw response for item (image attempt {image_attempt + 1}, total attempt {total_attempts}): {response.text}") # Debugging
175
+ if not response.text or response.text.isspace():
176
+ raise ValueError("Empty response from API")
177
+ cleaned_text = clean_response_text(response.text)
178
+ response_json = json.loads(cleaned_text)
179
+ if 'caption' not in response_json or 'image_prompt' not in response_json:
180
+ raise ValueError("Invalid JSON format: 'caption' or 'image_prompt' key missing")
181
+ text = response_json['caption']
182
+ image_prompt = response_json['image_prompt']
183
+ except Exception as e:
184
+ print(f"Error generating item (image attempt {image_attempt + 1}, total attempt {total_attempts}): {e}")
185
+ text = f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}"
186
+ image_prompt = f"A vivid scene of {selected_idea} related to {user_input}, in a vibrant pop art style, no text or letters"
187
+
188
+ # Attempt to generate the image
189
+ try:
190
+ imagen = client.models.generate_images(
191
+ model='imagen-3.0-generate-002',
192
+ prompt=image_prompt,
193
+ config=types.GenerateImagesConfig(
194
+ aspect_ratio="9:16",
195
+ number_of_images=1
196
+ )
197
  )
198
+ if imagen.generated_images and len(imagen.generated_images) > 0:
199
+ generated_image = imagen.generated_images[0]
200
+ image = Image.open(BytesIO(generated_image.image.image_bytes))
201
+ # Ensure the image matches the desired aspect ratio (9:16 = 0.5625)
202
+ target_width = 360
203
+ target_height = int(target_width / 9 * 16) # 9:16 aspect ratio
204
+ image = image.resize((target_width, target_height), Image.LANCZOS)
205
+ # Convert image to base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  buffered = BytesIO()
207
  image.save(buffered, format="PNG")
208
  img_str = base64.b64encode(buffered.getvalue()).decode()
209
+ break # Successfully generated image, exit image retry loop
210
+ else:
211
+ print(f"Image generation failed (image attempt {image_attempt + 1}, total attempt {total_attempts}): No images returned")
212
+ if image_attempt == max_retries - 1:
213
+ # Last image attempt in this cycle, use a gray placeholder if max total attempts not reached
214
+ if total_attempts == max_total_attempts:
215
+ image = Image.new('RGB', (360, 640), color='gray')
216
+ buffered = BytesIO()
217
+ image.save(buffered, format="PNG")
218
+ img_str = base64.b64encode(buffered.getvalue()).decode()
219
+ return {
220
+ 'text': text,
221
+ 'image_base64': img_str,
222
+ 'video_base64': None,
223
+ 'ideas': ideas
224
+ }
225
+ # Otherwise, select a new idea and retry image generation in the next cycle
226
+ continue
227
+ except Exception as e:
228
+ print(f"Error generating image (image attempt {image_attempt + 1}, total attempt {total_attempts}): {e}")
229
+ if image_attempt == max_retries - 1:
230
+ # Last image attempt in this cycle
231
+ if total_attempts == max_total_attempts:
232
+ # Max total attempts reached, use a gray placeholder
233
+ image = Image.new('RGB', (360, 640), color='gray')
234
+ buffered = BytesIO()
235
+ image.save(buffered, format="PNG")
236
+ img_str = base64.b64encode(buffered.getvalue()).decode()
237
+ return {
238
+ 'text': text,
239
+ 'image_base64': img_str,
240
+ 'video_base64': None,
241
+ 'ideas': ideas
242
+ }
243
+ # Otherwise, select a new idea and retry image generation in the next cycle
244
+ continue
245
+
246
+ # Step 2: Generate video if enabled (with retries using the same image)
247
+ if generate_video:
248
+ max_video_retries_per_image = 1 # Try video generation twice per image
249
+ for video_attempt in range(max_video_retries_per_image):
250
+ try:
251
+ # Base video prompt
252
+ video_prompt_base = f"""
253
+ The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
254
+ Use a close-up shot with a slow dolly shot circling around the subject,
255
+ using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting.
256
+ """
257
+ # Modify the prompt slightly for each retry
258
+ if video_attempt == 0:
259
+ video_prompt = video_prompt_base
260
+ else:
261
+ video_prompt = f"""
262
+ The user concept is "{user_input}". Based on this and a simplified scene: {image_prompt}, create a video.
263
+ Use a static close-up shot of the subject in a realistic style.
264
+ """
265
+
266
+ print(f"Attempting video generation (video attempt {video_attempt + 1}, total attempt {total_attempts}): {video_prompt}")
267
+ operation = client.models.generate_videos(
268
+ model="veo-2.0-generate-001",
269
+ prompt=video_prompt,
270
+ image=generated_image.image,
271
+ config=types.GenerateVideosConfig(
272
+ aspect_ratio="9:16",
273
+ number_of_videos=1,
274
+ duration_seconds=8,
275
+ negative_prompt="blurry, low quality, text, letters"
276
+ )
277
+ )
278
+ # Wait for video to generate
279
+ while not operation.done:
280
+ time.sleep(20)
281
+ operation = client.operations.get(operation)
282
+
283
+ # Enhanced error handling for video generation response
284
+ if operation.error:
285
+ raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
286
+ if operation.response is None:
287
+ raise ValueError("Video generation operation failed: No response")
288
+ if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
289
+ raise ValueError("Video generation operation failed: No generated_videos in response")
290
+
291
+ # Process the single generated video
292
+ if len(operation.response.generated_videos) > 0:
293
+ video = operation.response.generated_videos[0]
294
+ if video is None or not hasattr(video, 'video'):
295
+ raise ValueError("Video is invalid or missing video data")
296
+ fname = 'with_image_input.mp4'
297
+ print(f"Generated video: {fname}")
298
+ # Download the video and get the raw bytes
299
+ video_data = client.files.download(file=video.video)
300
+ # Ensure video_data is in bytes
301
+ if isinstance(video_data, bytes):
302
+ video_bytes = video_data
303
+ else:
304
+ # If video_data is a file-like object, read the bytes
305
+ video_buffer = BytesIO()
306
+ for chunk in video_data:
307
+ video_buffer.write(chunk)
308
+ video_bytes = video_buffer.getvalue()
309
+ # Encode the video bytes as base64
310
+ video_base64 = base64.b64encode(video_bytes).decode()
311
+ # Successfully generated video, return the result
312
+ return {
313
+ 'text': text,
314
+ 'image_base64': img_str,
315
+ 'video_base64': video_base64,
316
+ 'ideas': ideas
317
+ }
318
+ else:
319
+ raise ValueError("No video was generated")
320
+ except Exception as e:
321
+ print(f"Error generating video (video attempt {video_attempt + 1}, total attempt {total_attempts}): {e}")
322
+ if video_attempt == max_video_retries_per_image - 1:
323
+ if total_attempts == max_total_attempts:
324
+ print("Max total attempts reached. Proceeding without video.")
325
+ video_base64 = None
326
+ return {
327
+ 'text': text,
328
+ 'image_base64': img_str,
329
+ 'video_base64': video_base64,
330
+ 'ideas': ideas
331
+ }
332
+ # Video generation failed with this image, break to outer loop to try a new image
333
+ print(f"Video generation failed after {max_video_retries_per_image} attempts with this image. Selecting a new idea and generating a new image.")
334
+ break
335
+ continue # Retry video generation with the same image but a modified prompt
336
+
337
+ # If video generation is not enabled or succeeded, return the result
338
+ return {
339
+ 'text': text,
340
+ 'image_base64': img_str,
341
+ 'video_base64': video_base64,
342
+ 'ideas': ideas
343
+ }
344
+
345
+ # If max total attempts reached without success, use a gray placeholder image
346
+ print("Max total attempts reached without successful image generation. Using placeholder.")
347
+ image = Image.new('RGB', (360, 640), color='gray')
348
+ buffered = BytesIO()
349
+ image.save(buffered, format="PNG")
350
+ img_str = base64.b64encode(buffered.getvalue()).decode()
351
+ return {
352
+ 'text': f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}",
353
+ 'image_base64': img_str,
354
+ 'video_base64': None,
355
+ 'ideas': ideas
356
+ }
357
 
358
  def start_feed(user_input, generate_video, current_index, feed_items):
359
  """