fancyfeast commited on
Commit
301ae18
·
1 Parent(s): 40e61c6

Tweak UI and guide

Browse files
Files changed (1) hide show
  1. app.py +48 -46
app.py CHANGED
@@ -27,13 +27,9 @@ DESCRIPTION = """
27
  <h2>Quick-start</h2>
28
  <ol>
29
  <li><strong>Upload or drop</strong> an image in the left-hand panel.</li>
30
- <li>Pick a <strong>Caption Type</strong> and, if you wish, adjust the
31
- <strong>Caption Length</strong>.</li>
32
- <li>(Optional) tick any <strong>Extra Options</strong> checkboxes
33
- &nbsp;–&nbsp;these add or remove specific details in the caption.</li>
34
- <li>(Optional) expand <em>Generation settings</em> to tune
35
- <code>temperature</code>, <code>top-p</code>, or
36
- <code>max&nbsp;tokens</code>.</li>
37
  <li>Press <kbd>Caption</kbd>.
38
  The prompt sent to the model appears in the <em>Prompt</em> box (editable),
39
  and the resulting caption streams into the <em>Caption</em> box.</li>
@@ -50,21 +46,20 @@ DESCRIPTION = """
50
  <tr><td><strong>Straightforward</strong></td>
51
  <td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
52
  <tr><td><strong>Stable Diffusion Prompt</strong></td>
53
- <td>Reverse-engineers a prompt that could have produced the image in a
54
- SD/T2I model.</td></tr>
55
  <tr><td><strong>MidJourney</strong></td>
56
- <td>Same idea as above but tuned to MidJourney’s prompt style.</td></tr>
57
  <tr><td><strong>Danbooru tag list</strong></td>
58
  <td>Comma-separated tags strictly following Danbooru conventions
59
- (artist:, copyright:, etc.). Lower-case underscores only.</td></tr>
60
  <tr><td><strong>e621 tag list</strong></td>
61
  <td>Alphabetical, namespaced tags in e621 style – includes species/meta
62
- tags when relevant.</td></tr>
63
  <tr><td><strong>rul34 tag list</strong></td>
64
  <td>Rule34 style alphabetical tag dump; artist/copyright/character
65
- prefixes first.</td></tr>
66
  <tr><td><strong>Booru-like tag list</strong></td>
67
- <td>Looser tag list when you want labels but not a specific Booru format.</td></tr>
68
  <tr><td><strong>Art Critic</strong></td>
69
  <td>Paragraph of art-historical commentary: composition, symbolism, style,
70
  lighting, movement, etc.</td></tr>
@@ -74,6 +69,12 @@ DESCRIPTION = """
74
  <td>Catchy caption aimed at platforms like Instagram or BlueSky.</td></tr>
75
  </table>
76
 
 
 
 
 
 
 
77
  <!-- ───────────────────── Extras + generation notes ───────────────── -->
78
  <h3>Extra Options</h3>
79
  <p>These check-boxes fine-tune what the model should or should not mention:
@@ -267,38 +268,39 @@ with gr.Blocks() as demo:
267
  value="long",
268
  )
269
 
270
- extra_options = gr.CheckboxGroup(
271
- choices=[
272
- "If there is a person/character in the image you must refer to them as {name}.",
273
- "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
274
- "Include information about lighting.",
275
- "Include information about camera angle.",
276
- "Include information about whether there is a watermark or not.",
277
- "Include information about whether there are JPEG artifacts or not.",
278
- "If it is a photo you MUST include information about what camera was likely used and details such as aperture, shutter speed, ISO, etc.",
279
- "Do NOT include anything sexual; keep it PG.",
280
- "Do NOT mention the image's resolution.",
281
- "You MUST include information about the subjective aesthetic quality of the image from low to very high.",
282
- "Include information on the image's composition style, such as leading lines, rule of thirds, or symmetry.",
283
- "Do NOT mention any text that is in the image.",
284
- "Specify the depth of field and whether the background is in focus or blurred.",
285
- "If applicable, mention the likely use of artificial or natural lighting sources.",
286
- "Do NOT use any ambiguous language.",
287
- "Include whether the image is sfw, suggestive, or nsfw.",
288
- "ONLY describe the most important elements of the image.",
289
- "If it is a work of art, do not include the artist's name or the title of the work.",
290
- "Identify the image orientation (portrait, landscape, or square) and aspect ratio if obvious.",
291
- """Use vulgar slang and profanity, such as (but not limited to) "fucking," "slut," "cock," etc.""",
292
- "Do NOT use polite euphemisms—lean into blunt, casual phrasing.",
293
- "Include information about the ages of any people/characters when applicable.",
294
- "Mention whether the image depicts an extreme close-up, close-up, medium close-up, medium shot, cowboy shot, medium wide shot, wide shot, or extreme wide shot.",
295
- "Do not mention the mood/feeling/etc of the image.",
296
- "Explicitly specify the vantage height (eye-level, low-angle worm’s-eye, bird’s-eye, drone, rooftop, etc.).",
297
- "If there is a watermark, you must mention it.",
298
- """Your response will be used by a text-to-image model, so avoid useless meta phrases like “This image shows…”, "You are looking at...", etc.""",
299
- ],
300
- label="Extra Options"
301
- )
 
302
 
303
  name_input = gr.Textbox(label="Person / Character Name")
304
 
 
27
  <h2>Quick-start</h2>
28
  <ol>
29
  <li><strong>Upload or drop</strong> an image in the left-hand panel.</li>
30
+ <li>Pick a <strong>Caption Type</strong> and, if you wish, adjust the <strong>Caption Length</strong>.</li>
31
+ <li>(Optional) <em>expand the "Extra Options" accordion</em> and tick any boxes that should influence the caption.</li>
32
+ <li>(Optional) open <em>Generation settings</em> to adjust <code>temperature</code>, <code>top-p</code>, or <code>max&nbsp;tokens</code>.</li>
 
 
 
 
33
  <li>Press <kbd>Caption</kbd>.
34
  The prompt sent to the model appears in the <em>Prompt</em> box (editable),
35
  and the resulting caption streams into the <em>Caption</em> box.</li>
 
46
  <tr><td><strong>Straightforward</strong></td>
47
  <td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
48
  <tr><td><strong>Stable Diffusion Prompt</strong></td>
49
+ <td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3 % of the time.</em></td></tr>
 
50
  <tr><td><strong>MidJourney</strong></td>
51
+ <td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3 % of the time.</em></td></tr>
52
  <tr><td><strong>Danbooru tag list</strong></td>
53
  <td>Comma-separated tags strictly following Danbooru conventions
54
+ (artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
55
  <tr><td><strong>e621 tag list</strong></td>
56
  <td>Alphabetical, namespaced tags in e621 style – includes species/meta
57
+ tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
58
  <tr><td><strong>rul34 tag list</strong></td>
59
  <td>Rule34 style alphabetical tag dump; artist/copyright/character
60
+ prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
61
  <tr><td><strong>Booru-like tag list</strong></td>
62
+ <td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
63
  <tr><td><strong>Art Critic</strong></td>
64
  <td>Paragraph of art-historical commentary: composition, symbolism, style,
65
  lighting, movement, etc.</td></tr>
 
69
  <td>Catchy caption aimed at platforms like Instagram or BlueSky.</td></tr>
70
  </table>
71
 
72
+ <p style="margin-top:0.6em">
73
+ <strong>Note&nbsp;on Booru modes:</strong> They’re tuned for
74
+ anime-style / illustration imagery; accuracy drops on real-world photographs
75
+ or highly abstract artwork.
76
+ </p>
77
+
78
  <!-- ───────────────────── Extras + generation notes ───────────────── -->
79
  <h3>Extra Options</h3>
80
  <p>These check-boxes fine-tune what the model should or should not mention:
 
268
  value="long",
269
  )
270
 
271
+ with gr.Accordion("Extra Options", open=False):
272
+ extra_options = gr.CheckboxGroup(
273
+ choices=[
274
+ "If there is a person/character in the image you must refer to them as {name}.",
275
+ "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
276
+ "Include information about lighting.",
277
+ "Include information about camera angle.",
278
+ "Include information about whether there is a watermark or not.",
279
+ "Include information about whether there are JPEG artifacts or not.",
280
+ "If it is a photo you MUST include information about what camera was likely used and details such as aperture, shutter speed, ISO, etc.",
281
+ "Do NOT include anything sexual; keep it PG.",
282
+ "Do NOT mention the image's resolution.",
283
+ "You MUST include information about the subjective aesthetic quality of the image from low to very high.",
284
+ "Include information on the image's composition style, such as leading lines, rule of thirds, or symmetry.",
285
+ "Do NOT mention any text that is in the image.",
286
+ "Specify the depth of field and whether the background is in focus or blurred.",
287
+ "If applicable, mention the likely use of artificial or natural lighting sources.",
288
+ "Do NOT use any ambiguous language.",
289
+ "Include whether the image is sfw, suggestive, or nsfw.",
290
+ "ONLY describe the most important elements of the image.",
291
+ "If it is a work of art, do not include the artist's name or the title of the work.",
292
+ "Identify the image orientation (portrait, landscape, or square) and aspect ratio if obvious.",
293
+ """Use vulgar slang and profanity, such as (but not limited to) "fucking," "slut," "cock," etc.""",
294
+ "Do NOT use polite euphemisms—lean into blunt, casual phrasing.",
295
+ "Include information about the ages of any people/characters when applicable.",
296
+ "Mention whether the image depicts an extreme close-up, close-up, medium close-up, medium shot, cowboy shot, medium wide shot, wide shot, or extreme wide shot.",
297
+ "Do not mention the mood/feeling/etc of the image.",
298
+ "Explicitly specify the vantage height (eye-level, low-angle worm’s-eye, bird’s-eye, drone, rooftop, etc.).",
299
+ "If there is a watermark, you must mention it.",
300
+ """Your response will be used by a text-to-image model, so avoid useless meta phrases like “This image shows…”, "You are looking at...", etc.""",
301
+ ],
302
+ label="Select one or more",
303
+ )
304
 
305
  name_input = gr.Textbox(label="Person / Character Name")
306