QHL067 commited on
Commit
0f7795f
·
1 Parent(s): 9666147
Files changed (1) hide show
  1. app.py +230 -162
app.py CHANGED
@@ -268,6 +268,15 @@ def infer(
268
  # "A delicious ceviche cheesecake slice",
269
  # ]
270
 
 
 
 
 
 
 
 
 
 
271
  examples = [
272
  ["A robot cooking dinner in the kitchen", "An orange cat wearing sunglasses on a ship"],
273
  ]
@@ -283,170 +292,229 @@ with gr.Blocks(css=css) as demo:
283
  gr.Markdown("# CrossFlow")
284
  gr.Markdown("[CrossFlow](https://cross-flow.github.io/) directly transforms text representations into images for text-to-image generation, without the need for both the noise distribution and conditioning mechanism.")
285
  gr.Markdown("This direct mapping enables meaningful 'Linear Interpolation' and 'Arithmetic Operations' in the text latent space, as demonstrated here.")
286
- with gr.Tabs():
287
- with gr.Tab("Linear Interpolation"):
288
- gr.Markdown("This demo uses 256px images, 25 sampling steps (instead of 50), and 10 interpolations (instead of 50) to conserve GPU memory. For better results, see the original [code](https://github.com/qihao067/CrossFlow). (You may adjust them in Advanced Settings, but doing so may trigger OOM errors.)")
289
- # gr.Markdown("CrossFlow directly transforms text representations into images for text-to-image generation, enabling interpolation in the input text latent space.")
290
-
291
- with gr.Row():
292
- prompt1 = gr.Text(
293
- label="Prompt_1",
294
- show_label=False,
295
- max_lines=1,
296
- placeholder="Enter your prompt for the first image",
297
- container=False,
298
- )
299
 
300
- with gr.Row():
301
- prompt2 = gr.Text(
302
- label="Prompt_2",
303
- show_label=False,
304
- max_lines=1,
305
- placeholder="Enter your prompt for the second image",
306
- container=False,
307
- )
308
-
309
- with gr.Row():
310
- run_button = gr.Button("Run", scale=0, variant="primary")
311
-
312
- # Create separate outputs for the first image, last image, and the animated GIF
313
- first_image_output = gr.Image(label="Image of the first prompt", show_label=True)
314
- last_image_output = gr.Image(label="Image of the second prompt", show_label=True)
315
- gif_output = gr.Image(label="Linear interpolation", show_label=True)
316
-
317
- with gr.Accordion("Advanced Settings", open=False):
318
- seed = gr.Slider(
319
- label="Seed",
320
- minimum=0,
321
- maximum=MAX_SEED,
322
- step=1,
323
- value=0,
324
- )
325
-
326
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
327
-
328
- with gr.Row():
329
- guidance_scale = gr.Slider(
330
- label="Guidance scale",
331
- minimum=0.0,
332
- maximum=10.0,
333
- step=0.1,
334
- value=7.0, # Replace with defaults that work for your model
335
- )
336
- with gr.Row():
337
- num_inference_steps = gr.Slider(
338
- label="Number of inference steps - 50 inference steps are recommended; but you can reduce to 20 if the demo fails.",
339
- minimum=1,
340
- maximum=50,
341
- step=1,
342
- value=25, # Replace with defaults that work for your model
343
- )
344
- with gr.Row():
345
- num_of_interpolation = gr.Slider(
346
- label="Number of images for interpolation - More images yield smoother transitions but require more resources and may fail.",
347
- minimum=5,
348
- maximum=50,
349
- step=1,
350
- value=10, # Replace with defaults that work for your model
351
- )
352
-
353
- gr.Examples(examples=examples, inputs=[prompt1, prompt2])
354
-
355
- with gr.Tab("Arithmetic Operations"):
356
- # The second tab is currently empty. You can add more components later.
357
- gr.Markdown("This demo only supports addition or subtraction between two text latents ('Prompt_1 + Prompt_2' or 'Prompt_1 - Prompt_2'). For the other arithmetic operations, see the original [code](https://github.com/qihao067/CrossFlow).")
358
- with gr.Row():
359
- prompt1 = gr.Text(
360
- label="Prompt_1",
361
- show_label=False,
362
- max_lines=1,
363
- placeholder="Enter your prompt for the first image",
364
- container=False,
365
- )
366
 
367
- with gr.Row():
368
- prompt2 = gr.Text(
369
- label="Prompt_2",
370
- show_label=False,
371
- max_lines=1,
372
- placeholder="Enter your prompt for the second image",
373
- container=False,
374
- )
375
-
376
- with gr.Row():
377
- operation_mode = gr.Radio(
378
- choices=["Addition", "Subtraction"],
379
- label="Operation Mode",
380
- value="Addition",
381
- )
382
- with gr.Row():
383
- run_button = gr.Button("Run", scale=0, variant="primary")
384
-
385
- # Create separate outputs for the first image, last image, and the animated GIF
386
- first_image_output = gr.Image(label="Image of the first prompt", show_label=True)
387
- last_image_output = gr.Image(label="Image of the second prompt", show_label=True)
388
- gif_output = gr.Image(label="Linear interpolation", show_label=True)
389
-
390
- with gr.Accordion("Advanced Settings", open=False):
391
- seed = gr.Slider(
392
- label="Seed",
393
- minimum=0,
394
- maximum=MAX_SEED,
395
- step=1,
396
- value=0,
397
- )
398
-
399
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
400
-
401
- with gr.Row():
402
- guidance_scale = gr.Slider(
403
- label="Guidance scale",
404
- minimum=0.0,
405
- maximum=10.0,
406
- step=0.1,
407
- value=7.0, # Replace with defaults that work for your model
408
- )
409
- # with gr.Row():
410
- # num_inference_steps = gr.Slider(
411
- # label="Number of inference steps - 50 inference steps are recommended; but you can reduce to 20 if the demo fails.",
412
- # minimum=1,
413
- # maximum=50,
414
- # step=1,
415
- # value=55, # Replace with defaults that work for your model
416
- # )
417
- if operation_mode == "Addition":
418
- num_inference_steps = -1
419
- elif operation_mode == "Subtraction":
420
- num_inference_steps = -2
421
- else:
422
- num_inference_steps = 0
423
-
424
- with gr.Row():
425
- num_of_interpolation = gr.Slider(
426
- label="Number of images for interpolation - More images yield smoother transitions but require more resources and may fail.",
427
- minimum=5,
428
- maximum=50,
429
- step=1,
430
- value=50, # Replace with defaults that work for your model
431
- )
432
-
433
- gr.Examples(examples=examples, inputs=[prompt1, prompt2])
434
-
435
- gr.on(
436
- triggers=[run_button.click, prompt1.submit, prompt2.submit],
437
- fn=infer,
438
- inputs=[
439
- prompt1,
440
- prompt2,
441
- seed,
442
- randomize_seed,
443
- guidance_scale,
444
- num_inference_steps,
445
- num_of_interpolation,
446
- ],
447
- outputs=[first_image_output, last_image_output, gif_output, seed],
448
- # outputs=[first_image_output, last_image_output, seed],
449
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
 
452
  if __name__ == "__main__":
 
268
  # "A delicious ceviche cheesecake slice",
269
  # ]
270
 
271
+ def infer_tab1(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation):
272
+ default_op = "Addition"
273
+ return infer(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation, default_op)
274
+
275
+ # Wrapper for Tab 2: Uses operation_mode and fixes num_of_interpolation to 3.
276
+ def infer_tab2(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, operation_mode):
277
+ default_interpolation = 3
278
+ return infer(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, default_interpolation, operation_mode)
279
+
280
  examples = [
281
  ["A robot cooking dinner in the kitchen", "An orange cat wearing sunglasses on a ship"],
282
  ]
 
292
  gr.Markdown("# CrossFlow")
293
  gr.Markdown("[CrossFlow](https://cross-flow.github.io/) directly transforms text representations into images for text-to-image generation, without the need for both the noise distribution and conditioning mechanism.")
294
  gr.Markdown("This direct mapping enables meaningful 'Linear Interpolation' and 'Arithmetic Operations' in the text latent space, as demonstrated here.")
295
+ # with gr.Tabs():
296
+ # with gr.Tab("Linear Interpolation"):
297
+ # gr.Markdown("This demo uses 256px images, 25 sampling steps (instead of 50), and 10 interpolations (instead of 50) to conserve GPU memory. For better results, see the original [code](https://github.com/qihao067/CrossFlow). (You may adjust them in Advanced Settings, but doing so may trigger OOM errors.)")
298
+ # # gr.Markdown("CrossFlow directly transforms text representations into images for text-to-image generation, enabling interpolation in the input text latent space.")
299
+
300
+ # with gr.Row():
301
+ # prompt1 = gr.Text(
302
+ # label="Prompt_1",
303
+ # show_label=False,
304
+ # max_lines=1,
305
+ # placeholder="Enter your prompt for the first image",
306
+ # container=False,
307
+ # )
308
 
309
+ # with gr.Row():
310
+ # prompt2 = gr.Text(
311
+ # label="Prompt_2",
312
+ # show_label=False,
313
+ # max_lines=1,
314
+ # placeholder="Enter your prompt for the second image",
315
+ # container=False,
316
+ # )
317
+
318
+ # with gr.Row():
319
+ # run_button = gr.Button("Run", scale=0, variant="primary")
320
+
321
+ # # Create separate outputs for the first image, last image, and the animated GIF
322
+ # first_image_output = gr.Image(label="Image of the first prompt", show_label=True)
323
+ # last_image_output = gr.Image(label="Image of the second prompt", show_label=True)
324
+ # gif_output = gr.Image(label="Linear interpolation", show_label=True)
325
+
326
+ # with gr.Accordion("Advanced Settings", open=False):
327
+ # seed = gr.Slider(
328
+ # label="Seed",
329
+ # minimum=0,
330
+ # maximum=MAX_SEED,
331
+ # step=1,
332
+ # value=0,
333
+ # )
334
+
335
+ # randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
336
+
337
+ # with gr.Row():
338
+ # guidance_scale = gr.Slider(
339
+ # label="Guidance scale",
340
+ # minimum=0.0,
341
+ # maximum=10.0,
342
+ # step=0.1,
343
+ # value=7.0, # Replace with defaults that work for your model
344
+ # )
345
+ # with gr.Row():
346
+ # num_inference_steps = gr.Slider(
347
+ # label="Number of inference steps - 50 inference steps are recommended; but you can reduce to 20 if the demo fails.",
348
+ # minimum=1,
349
+ # maximum=50,
350
+ # step=1,
351
+ # value=25, # Replace with defaults that work for your model
352
+ # )
353
+ # with gr.Row():
354
+ # num_of_interpolation = gr.Slider(
355
+ # label="Number of images for interpolation - More images yield smoother transitions but require more resources and may fail.",
356
+ # minimum=5,
357
+ # maximum=50,
358
+ # step=1,
359
+ # value=10, # Replace with defaults that work for your model
360
+ # )
361
+
362
+ # gr.Examples(examples=examples, inputs=[prompt1, prompt2])
363
+
364
+ # with gr.Tab("Arithmetic Operations"):
365
+ # # The second tab is currently empty. You can add more components later.
366
+ # gr.Markdown("This demo only supports addition or subtraction between two text latents ('Prompt_1 + Prompt_2' or 'Prompt_1 - Prompt_2'). For the other arithmetic operations, see the original [code](https://github.com/qihao067/CrossFlow).")
367
+ # with gr.Row():
368
+ # prompt1 = gr.Text(
369
+ # label="Prompt_1",
370
+ # show_label=False,
371
+ # max_lines=1,
372
+ # placeholder="Enter your prompt for the first image",
373
+ # container=False,
374
+ # )
375
 
376
+ # with gr.Row():
377
+ # prompt2 = gr.Text(
378
+ # label="Prompt_2",
379
+ # show_label=False,
380
+ # max_lines=1,
381
+ # placeholder="Enter your prompt for the second image",
382
+ # container=False,
383
+ # )
384
+
385
+ # with gr.Row():
386
+ # operation_mode = gr.Radio(
387
+ # choices=["Addition", "Subtraction"],
388
+ # label="Operation Mode",
389
+ # value="Addition",
390
+ # )
391
+ # with gr.Row():
392
+ # run_button = gr.Button("Run", scale=0, variant="primary")
393
+
394
+ # # Create separate outputs for the first image, last image, and the animated GIF
395
+ # first_image_output = gr.Image(label="Image of the first prompt", show_label=True)
396
+ # last_image_output = gr.Image(label="Image of the second prompt", show_label=True)
397
+ # gif_output = gr.Image(label="Linear interpolation", show_label=True)
398
+
399
+ # with gr.Accordion("Advanced Settings", open=False):
400
+ # seed = gr.Slider(
401
+ # label="Seed",
402
+ # minimum=0,
403
+ # maximum=MAX_SEED,
404
+ # step=1,
405
+ # value=0,
406
+ # )
407
+
408
+ # randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
409
+
410
+ # with gr.Row():
411
+ # guidance_scale = gr.Slider(
412
+ # label="Guidance scale",
413
+ # minimum=0.0,
414
+ # maximum=10.0,
415
+ # step=0.1,
416
+ # value=7.0, # Replace with defaults that work for your model
417
+ # )
418
+ # # with gr.Row():
419
+ # # num_inference_steps = gr.Slider(
420
+ # # label="Number of inference steps - 50 inference steps are recommended; but you can reduce to 20 if the demo fails.",
421
+ # # minimum=1,
422
+ # # maximum=50,
423
+ # # step=1,
424
+ # # value=55, # Replace with defaults that work for your model
425
+ # # )
426
+
427
+ # with gr.Row():
428
+ # num_of_interpolation = gr.Slider(
429
+ # label="Number of images for interpolation - More images yield smoother transitions but require more resources and may fail.",
430
+ # minimum=5,
431
+ # maximum=50,
432
+ # step=1,
433
+ # value=50, # Replace with defaults that work for your model
434
+ # )
435
+
436
+ # gr.Examples(examples=examples, inputs=[prompt1, prompt2])
437
+
438
+ # gr.on(
439
+ # triggers=[run_button.click, prompt1.submit, prompt2.submit],
440
+ # fn=infer,
441
+ # inputs=[
442
+ # prompt1,
443
+ # prompt2,
444
+ # seed,
445
+ # randomize_seed,
446
+ # guidance_scale,
447
+ # num_inference_steps,
448
+ # num_of_interpolation,
449
+ # ],
450
+ # outputs=[first_image_output, last_image_output, gif_output, seed],
451
+ # # outputs=[first_image_output, last_image_output, seed],
452
+ # )
453
+
454
+ with gr.Blocks(css=css) as demo:
455
+ with gr.Tabs():
456
+ # --- Tab 1: Interpolation Mode (no operation_mode) ---
457
+ with gr.Tab("Tab 1: Interpolation Mode"):
458
+ gr.Markdown("**Tab 1:** This tab uses a slider for the number of interpolated images. The operation mode is fixed to *Addition* by default.")
459
+
460
+ prompt1_tab1 = gr.Text(placeholder="Prompt for first image", label="Prompt 1")
461
+ prompt2_tab1 = gr.Text(placeholder="Prompt for second image", label="Prompt 2")
462
+ seed_tab1 = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
463
+ randomize_seed_tab1 = gr.Checkbox(label="Randomize seed", value=True)
464
+ guidance_scale_tab1 = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=7.0, label="Guidance Scale")
465
+ num_inference_steps_tab1 = gr.Slider(minimum=1, maximum=50, step=1, value=25, label="Number of Inference Steps")
466
+ num_of_interpolation_tab1 = gr.Slider(minimum=5, maximum=50, step=1, value=10, label="Number of Images for Interpolation")
467
+ run_button_tab1 = gr.Button("Run")
468
+
469
+ first_image_output_tab1 = gr.Image(label="Image of the first prompt")
470
+ last_image_output_tab1 = gr.Image(label="Image of the second prompt")
471
+ gif_output_tab1 = gr.Image(label="Linear interpolation")
472
+
473
+ run_button_tab1.click(
474
+ fn=infer_tab1,
475
+ inputs=[
476
+ prompt1_tab1,
477
+ prompt2_tab1,
478
+ seed_tab1,
479
+ randomize_seed_tab1,
480
+ guidance_scale_tab1,
481
+ num_inference_steps_tab1,
482
+ num_of_interpolation_tab1
483
+ ],
484
+ outputs=[first_image_output_tab1, last_image_output_tab1, gif_output_tab1, seed_tab1]
485
+ )
486
+
487
+ # --- Tab 2: Operation Mode (no num_of_interpolation) ---
488
+ with gr.Tab("Tab 2: Operation Mode"):
489
+ gr.Markdown("**Tab 2:** This tab lets you choose the operation mode (Addition or Subtraction) while fixing the number of interpolations to 3.")
490
+
491
+ prompt1_tab2 = gr.Text(placeholder="Prompt for first image", label="Prompt 1")
492
+ prompt2_tab2 = gr.Text(placeholder="Prompt for second image", label="Prompt 2")
493
+ seed_tab2 = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
494
+ randomize_seed_tab2 = gr.Checkbox(label="Randomize seed", value=True)
495
+ guidance_scale_tab2 = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=7.0, label="Guidance Scale")
496
+ num_inference_steps_tab2 = gr.Slider(minimum=1, maximum=50, step=1, value=25, label="Number of Inference Steps")
497
+ operation_mode_tab2 = gr.Radio(choices=["Addition", "Subtraction"], label="Operation Mode", value="Addition")
498
+ run_button_tab2 = gr.Button("Run")
499
+
500
+ first_image_output_tab2 = gr.Image(label="Image of the first prompt")
501
+ last_image_output_tab2 = gr.Image(label="Image of the second prompt")
502
+ gif_output_tab2 = gr.Image(label="Linear interpolation")
503
+
504
+ run_button_tab2.click(
505
+ fn=infer_tab2,
506
+ inputs=[
507
+ prompt1_tab2,
508
+ prompt2_tab2,
509
+ seed_tab2,
510
+ randomize_seed_tab2,
511
+ guidance_scale_tab2,
512
+ num_inference_steps_tab2,
513
+ operation_mode_tab2
514
+ ],
515
+ outputs=[first_image_output_tab2, last_image_output_tab2, gif_output_tab2, seed_tab2]
516
+ )
517
+
518
 
519
 
520
  if __name__ == "__main__":