viklofg commited on
Commit
5f891d2
·
1 Parent(s): 3521300

Add basic support for IIIF manifests

Browse files
Files changed (2) hide show
  1. app/gradio_config.py +7 -0
  2. app/tabs/submit.py +83 -40
app/gradio_config.py CHANGED
@@ -120,5 +120,12 @@ height: auto !important;
120
  display:block;
121
  }
122
 
 
 
 
 
123
 
 
 
 
124
  """
 
120
  display:block;
121
  }
122
 
123
+ .image_tab {
124
+ background: var(--bg);
125
+ border: none;
126
+ }
127
 
128
+ .image_tabs {
129
+ background: var(--bg);
130
+ }
131
  """
app/tabs/submit.py CHANGED
@@ -1,6 +1,8 @@
1
  import logging
2
  import os
3
  import time
 
 
4
 
5
  import gradio as gr
6
  import spaces
@@ -158,35 +160,47 @@ def get_selected_example_pipeline(event: gr.SelectData) -> str | None:
158
  return name
159
 
160
 
161
- def get_image_from_image_url(input_value):
 
 
 
 
162
  """
163
- Get URL of the image from either an image_id (from Riksarkivet) or an image_url directly.
164
- If input_value is an image_id, it constructs the IIIF URL.
165
- If input_value is an image_url, it returns the URL as-is.
 
 
166
  """
 
 
 
 
 
 
167
 
168
- if input_value.startswith("http"):
169
- return [input_value]
170
- else:
171
- input_value = input_value.split(",")
 
 
172
 
173
- return [
174
- (
175
- f"https://lbiiif.riksarkivet.se/arkis!{item.strip()}/full/max/0/default.jpg"
176
- )
177
- for item in input_value
178
- ]
179
 
180
 
181
  with gr.Blocks() as submit:
182
 
183
  gr.Markdown("# Upload")
184
- gr.Markdown(
185
- "Select or upload the image you want to transcribe. You can upload up to five images at a time. \n "
186
- "Alternatively, you can choose from example images from the gallery or use Image_IDs."
187
- )
188
 
189
  collection_submit_state = gr.State()
 
190
  with gr.Group():
191
  with gr.Row(equal_height=True):
192
  with gr.Column(scale=2):
@@ -198,26 +212,51 @@ with gr.Blocks() as submit:
198
  )
199
 
200
  with gr.Column(scale=1):
201
- examples = gr.Gallery(
202
- all_example_images(),
203
- label="Examples",
204
- interactive=False,
205
- allow_preview=False,
206
- object_fit="scale-down",
207
- min_width=250,
208
- height="100%",
209
- columns=4,
210
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- image_iiif_url = gr.Textbox(
213
- label="Upload by image ID",
214
- info=(
215
- "Use any image from our digitized archives by pasting its image ID found in the "
216
- "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
217
- "Press enter to submit."
218
- ),
219
- placeholder="R0002231_00005, R0002231_00006",
220
- )
221
 
222
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
223
  gr.Markdown("## Settings")
@@ -287,9 +326,11 @@ with gr.Blocks() as submit:
287
  return gr.update(value=None)
288
  return images
289
 
290
- image_iiif_url.submit(
291
- fn=get_image_from_image_url, inputs=image_iiif_url, outputs=batch_image_gallery
292
- ).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
 
 
293
 
294
  run_button.click(
295
  fn=run_htrflow,
@@ -300,4 +341,6 @@ with gr.Blocks() as submit:
300
  examples.select(get_selected_example_image, None, batch_image_gallery)
301
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
302
 
 
 
303
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)
 
1
  import logging
2
  import os
3
  import time
4
+ import re
5
+ import requests
6
 
7
  import gradio as gr
8
  import spaces
 
160
  return name
161
 
162
 
163
+ def get_image_from_image_id(image_id):
164
+ return [f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"]
165
+
166
+
167
+ def get_images_from_iiif_manifest(iiif_manifest_url):
168
  """
169
+ Read all images from a v2/v3 IIIF manifest.
170
+
171
+ Arguments:
172
+ manifest: IIIF manifest
173
+ height: Max height of returned images.
174
  """
175
+ try:
176
+ response = requests.get(iiif_manifest_url, timeout=5)
177
+ response.raise_for_status()
178
+ except (requests.HTTPError, requests.ConnectionError) as e:
179
+ gr.Error(f"Could not fetch IIIF manifest from {iiif_manifest_url} ({e})")
180
+ return
181
 
182
+ # Hacky solution to get all images regardless of API version - treat
183
+ # the manifest as a string and match everything that looks like an IIIF
184
+ # image URL.
185
+ manifest = response.text
186
+ pattern = r'(?P<identifier>https?://[^"\s]*)/(?P<region>[^"\s]*?)/(?P<size>[^"\s]*?)/(?P<rotation>!?\d*?)/(?P<quality>[^"\s]*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)'
187
+ height= 1200
188
 
189
+ images = set() # create a set to eliminate duplicates (e.g. thumbnails and fullsize images)
190
+ for match in re.findall(pattern, manifest):
191
+ identifier, _, _, _, _, format_ = match
192
+ images.add(f"{identifier}/full/{height},/0/default.{format_}")
193
+
194
+ return sorted(images)
195
 
196
 
197
  with gr.Blocks() as submit:
198
 
199
  gr.Markdown("# Upload")
200
+ gr.Markdown("Select or upload the image you want to transcribe. You can upload up to five images at a time.")
 
 
 
201
 
202
  collection_submit_state = gr.State()
203
+
204
  with gr.Group():
205
  with gr.Row(equal_height=True):
206
  with gr.Column(scale=2):
 
212
  )
213
 
214
  with gr.Column(scale=1):
215
+ with gr.Tabs(elem_classes="image_tabs"):
216
+ with gr.Tab("Examples", elem_classes="image_tab"):
217
+ examples = gr.Gallery(
218
+ all_example_images(),
219
+ show_label=False,
220
+ interactive=False,
221
+ allow_preview=False,
222
+ object_fit="scale-down",
223
+ min_width=250,
224
+ height="100%",
225
+ columns=4,
226
+ )
227
+
228
+ with gr.Tab("Image ID", elem_classes="image_tab"):
229
+ image_id = gr.Textbox(
230
+ label="Upload by image ID",
231
+ info=(
232
+ "Use any image from our digitized archives by pasting its image ID found in the "
233
+ "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
234
+ "Press enter to submit."
235
+ ),
236
+ placeholder="R0002231_00005",
237
+ )
238
+
239
+ with gr.Tab("IIIF Manifest", elem_classes="image_tab"):
240
+ iiif_manifest_url = gr.Textbox(
241
+ label="IIIF Manifest",
242
+ info=(
243
+ "Use an image from a IIIF manifest by pasting a IIIF manifest URL. "
244
+ "Press enter to submit."
245
+ ),
246
+ placeholder="",
247
+ )
248
+ iiif_gallery = gr.Gallery(
249
+ interactive=False,
250
+ columns=4,
251
+ allow_preview=False,
252
+ container=False,
253
+ show_label=False,
254
+ object_fit="scale-down",
255
+ )
256
+
257
+ with gr.Tab("URL", elem_classes="image_tab"):
258
+ image_url = gr.Textbox(label="Image URL", info="Upload an image by pasting its URL.", placeholder="https://example.com/image.jpg")
259
 
 
 
 
 
 
 
 
 
 
260
 
261
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
262
  gr.Markdown("## Settings")
 
326
  return gr.update(value=None)
327
  return images
328
 
329
+
330
+ image_id.submit(get_image_from_image_id, image_id, batch_image_gallery).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
331
+ iiif_manifest_url.submit(get_images_from_iiif_manifest, iiif_manifest_url, iiif_gallery)
332
+ image_url.submit(lambda url: [url], image_url, batch_image_gallery)
333
+
334
 
335
  run_button.click(
336
  fn=run_htrflow,
 
341
  examples.select(get_selected_example_image, None, batch_image_gallery)
342
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
343
 
344
+ iiif_gallery.select(get_selected_example_image, None, batch_image_gallery)
345
+
346
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)