akshayka commited on
Commit
2743dd0
Β·
verified Β·
1 Parent(s): 4056b78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -353
app.py CHANGED
@@ -1,470 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import marimo
2
 
3
- __generated_with = "0.9.2"
4
- app = marimo.App()
5
 
6
 
7
  @app.cell
8
- def __():
9
  import marimo as mo
10
-
11
- mo.md("# Welcome to marimo! πŸŒŠπŸƒ")
12
  return (mo,)
13
 
14
 
15
- @app.cell
16
- def __(mo):
17
- slider = mo.ui.slider(1, 22)
18
- return (slider,)
19
-
20
-
21
- @app.cell
22
- def __(mo, slider):
23
  mo.md(
24
- f"""
25
- marimo is a **reactive** Python notebook.
26
 
27
- This means that unlike traditional notebooks, marimo notebooks **run
28
- automatically** when you modify them or
29
- interact with UI elements, like this slider: {slider}.
30
 
31
- {"##" + "πŸƒ" * slider.value}
32
  """
33
  )
34
  return
35
 
36
 
37
  @app.cell(hide_code=True)
38
- def __(mo):
39
- mo.accordion(
40
- {
41
- "Tip: disabling automatic execution": mo.md(
42
- rf"""
43
- marimo lets you disable automatic execution: just go into the
44
- notebook settings and set
45
-
46
- "Runtime > On Cell Change" to "lazy".
47
-
48
- When the runtime is lazy, after running a cell, marimo marks its
49
- descendants as stale instead of automatically running them. The
50
- lazy runtime puts you in control over when cells are run, while
51
- still giving guarantees about the notebook state.
52
- """
53
- )
54
- }
55
- )
56
- return
57
-
58
-
59
- @app.cell(hide_code=True)
60
- def __(mo):
61
  mo.md(
62
- """
63
- Tip: This is a tutorial notebook. You can create your own notebooks
64
- by entering `marimo edit` at the command line.
65
- """
66
- ).callout()
67
- return
68
 
 
69
 
70
- @app.cell(hide_code=True)
71
- def __(mo):
72
- mo.md(
73
  """
74
- ## 1. Reactive execution
 
75
 
76
- A marimo notebook is made up of small blocks of Python code called
77
- cells.
78
 
79
- marimo reads your cells and models the dependencies among them: whenever
80
- a cell that defines a global variable is run, marimo
81
- **automatically runs** all cells that reference that variable.
82
 
83
- Reactivity keeps your program state and outputs in sync with your code,
84
- making for a dynamic programming environment that prevents bugs before they
85
- happen.
86
- """
87
- )
88
- return
89
 
90
 
91
- @app.cell(hide_code=True)
92
- def __(changed, mo):
93
- (
94
- mo.md(
95
- f"""
96
- **✨ Nice!** The value of `changed` is now {changed}.
97
-
98
- When you updated the value of the variable `changed`, marimo
99
- **reacted** by running this cell automatically, because this cell
100
- references the global variable `changed`.
101
-
102
- Reactivity ensures that your notebook state is always
103
- consistent, which is crucial for doing good science; it's also what
104
- enables marimo notebooks to double as tools and apps.
105
- """
106
- )
107
- if changed
108
- else mo.md(
109
- """
110
- **🌊 See it in action.** In the next cell, change the value of the
111
- variable `changed` to `True`, then click the run button.
112
- """
113
  )
114
- )
115
- return
 
116
 
117
 
118
  @app.cell
119
- def __():
120
- changed = False
121
- return (changed,)
 
 
 
 
 
 
 
 
 
 
122
 
123
 
124
  @app.cell(hide_code=True)
125
- def __(mo):
126
- mo.accordion(
127
- {
128
- "Tip: execution order": (
129
- """
130
- The order of cells on the page has no bearing on
131
- the order in which cells are executed: marimo knows that a cell
132
- reading a variable must run after the cell that defines it. This
133
- frees you to organize your code in the way that makes the most
134
- sense for you.
135
- """
136
- )
137
- }
138
  )
139
- return
 
140
 
141
 
142
  @app.cell(hide_code=True)
143
- def __(mo):
144
- mo.md(
145
- """
146
- **Global names must be unique.** To enable reactivity, marimo imposes a
147
- constraint on how names appear in cells: no two cells may define the same
148
- variable.
149
- """
150
- )
151
- return
152
 
153
 
154
- @app.cell(hide_code=True)
155
- def __(mo):
156
- mo.accordion(
157
- {
158
- "Tip: encapsulation": (
159
- """
160
- By encapsulating logic in functions, classes, or Python modules,
161
- you can minimize the number of global variables in your notebook.
162
- """
163
- )
164
- }
165
- )
166
- return
167
 
168
 
169
- @app.cell(hide_code=True)
170
- def __(mo):
171
- mo.accordion(
172
- {
173
- "Tip: private variables": (
174
- """
175
- Variables prefixed with an underscore are "private" to a cell, so
176
- they can be defined by multiple cells.
177
- """
178
- )
179
- }
180
  )
181
  return
182
 
183
 
184
  @app.cell(hide_code=True)
185
- def __(mo):
186
  mo.md(
187
- """
188
- ## 2. UI elements
189
 
190
- Cells can output interactive UI elements. Interacting with a UI
191
- element **automatically triggers notebook execution**: when
192
- you interact with a UI element, its value is sent back to Python, and
193
- every cell that references that element is re-run.
194
 
195
- marimo provides a library of UI elements to choose from under
196
- `marimo.ui`.
197
  """
198
  )
199
  return
200
 
201
 
202
- @app.cell
203
- def __(mo):
204
- mo.md("""**🌊 Some UI elements.** Try interacting with the below elements.""")
 
 
 
 
 
 
205
  return
206
 
207
 
208
  @app.cell
209
- def __(mo):
210
- icon = mo.ui.dropdown(["πŸƒ", "🌊", "✨"], value="πŸƒ")
211
- return (icon,)
212
 
 
 
213
 
214
- @app.cell
215
- def __(icon, mo):
216
- repetitions = mo.ui.slider(1, 16, label=f"number of {icon.value}: ")
217
- return (repetitions,)
218
 
 
 
 
 
 
219
 
220
- @app.cell
221
- def __(icon, repetitions):
222
- icon, repetitions
223
  return
224
 
225
 
226
  @app.cell
227
- def __(icon, mo, repetitions):
228
- mo.md("# " + icon.value * repetitions.value)
229
- return
 
 
 
 
230
 
231
 
232
  @app.cell(hide_code=True)
233
- def __(mo):
234
- mo.md(
235
- """
236
- ## 3. marimo is just Python
237
 
238
- marimo cells parse Python (and only Python), and marimo notebooks are
239
- stored as pure Python files β€” outputs are _not_ included. There's no
240
- magical syntax.
241
 
242
- The Python files generated by marimo are:
 
 
 
 
 
 
 
243
 
244
- - easily versioned with git, yielding minimal diffs
245
- - legible for both humans and machines
246
- - formattable using your tool of choice,
247
- - usable as Python scripts, with UI elements taking their default
248
- values, and
249
- - importable by other modules (more on that in the future).
250
- """
251
  )
252
- return
 
 
 
253
 
254
 
255
  @app.cell(hide_code=True)
256
- def __(mo):
257
  mo.md(
258
- """
259
- ## 4. Running notebooks as apps
260
-
261
- marimo notebooks can double as apps. Click the app window icon in the
262
- bottom-right to see this notebook in "app view."
263
 
264
- Serve a notebook as an app with `marimo run` at the command-line.
265
- Of course, you can use marimo just to level-up your
266
- notebooking, without ever making apps.
267
  """
268
  )
269
  return
270
 
271
 
272
  @app.cell(hide_code=True)
273
- def __(mo):
274
  mo.md(
275
- """
276
- ## 5. The `marimo` command-line tool
277
-
278
- **Creating and editing notebooks.** Use
279
-
280
- ```
281
- marimo edit
282
- ```
283
-
284
- in a terminal to start the marimo notebook server. From here
285
- you can create a new notebook or edit existing ones.
286
-
287
-
288
- **Running as apps.** Use
289
 
290
- ```
291
- marimo run notebook.py
292
- ```
293
-
294
- to start a webserver that serves your notebook as an app in read-only mode,
295
- with code cells hidden.
296
-
297
- **Convert a Jupyter notebook.** Convert a Jupyter notebook to a marimo
298
- notebook using `marimo convert`:
299
-
300
- ```
301
- marimo convert your_notebook.ipynb > your_app.py
302
- ```
303
-
304
- **Tutorials.** marimo comes packaged with tutorials:
305
 
306
- - `dataflow`: more on marimo's automatic execution
307
- - `ui`: how to use UI elements
308
- - `markdown`: how to write markdown, with interpolated values and
309
- LaTeX
310
- - `plots`: how plotting works in marimo
311
- - `sql`: how to use SQL
312
- - `layout`: layout elements in marimo
313
- - `fileformat`: how marimo's file format works
314
- - `markdown-format`: for using `.md` files in marimo
315
- - `for-jupyter-users`: if you are coming from Jupyter
316
 
317
- Start a tutorial with `marimo tutorial`; for example,
 
 
 
 
 
 
318
 
319
- ```
320
- marimo tutorial dataflow
321
- ```
322
 
323
- In addition to tutorials, we have examples in our
324
- [our GitHub repo](https://www.github.com/marimo-team/marimo/tree/main/examples).
325
- """
 
 
326
  )
 
 
327
  return
328
 
329
 
330
  @app.cell(hide_code=True)
331
- def __(mo):
332
  mo.md(
333
- """
334
- ## 6. The marimo editor
335
-
336
- Here are some tips to help you get started with the marimo editor.
337
  """
338
  )
339
  return
340
 
341
 
342
  @app.cell
343
- def __(mo, tips):
344
- mo.accordion(tips)
345
- return
 
 
 
 
 
 
 
 
 
 
346
 
347
 
348
  @app.cell(hide_code=True)
349
- def __(mo):
350
- mo.md("""## Finally, a fun fact""")
351
- return
 
352
 
353
 
354
  @app.cell(hide_code=True)
355
- def __(mo):
356
- mo.md(
357
- """
358
- The name "marimo" is a reference to a type of algae that, under
359
- the right conditions, clumps together to form a small sphere
360
- called a "marimo moss ball". Made of just strands of algae, these
361
- beloved assemblages are greater than the sum of their parts.
362
- """
363
  )
364
  return
365
 
366
 
367
  @app.cell(hide_code=True)
368
- def __():
369
- tips = {
370
- "Saving": (
371
- """
372
- **Saving**
373
-
374
- - _Name_ your app using the box at the top of the screen, or
375
- with `Ctrl/Cmd+s`. You can also create a named app at the
376
- command line, e.g., `marimo edit app_name.py`.
377
-
378
- - _Save_ by clicking the save icon on the bottom right, or by
379
- inputting `Ctrl/Cmd+s`. By default marimo is configured
380
- to autosave.
381
- """
382
- ),
383
- "Running": (
384
- """
385
- 1. _Run a cell_ by clicking the play ( β–· ) button on the top
386
- right of a cell, or by inputting `Ctrl/Cmd+Enter`.
387
-
388
- 2. _Run a stale cell_ by clicking the yellow run button on the
389
- right of the cell, or by inputting `Ctrl/Cmd+Enter`. A cell is
390
- stale when its code has been modified but not run.
391
-
392
- 3. _Run all stale cells_ by clicking the play ( β–· ) button on
393
- the bottom right of the screen, or input `Ctrl/Cmd+Shift+r`.
394
- """
395
- ),
396
- "Console Output": (
397
- """
398
- Console output (e.g., `print()` statements) is shown below a
399
- cell.
400
- """
401
- ),
402
- "Creating, Moving, and Deleting Cells": (
403
- """
404
- 1. _Create_ a new cell above or below a given one by clicking
405
- the plus button to the left of the cell, which appears on
406
- mouse hover.
407
-
408
- 2. _Move_ a cell up or down by dragging on the handle to the
409
- right of the cell, which appears on mouse hover.
410
-
411
- 3. _Delete_ a cell by clicking the trash bin icon. Bring it
412
- back by clicking the undo button on the bottom right of the
413
- screen, or with `Ctrl/Cmd+Shift+z`.
414
- """
415
- ),
416
- "Disabling Automatic Execution": (
417
- """
418
- Via the notebook settings (gear icon) or footer panel, you
419
- can disable automatic execution. This is helpful when
420
- working with expensive notebooks or notebooks that have
421
- side-effects like database transactions.
422
- """
423
- ),
424
- "Disabling Cells": (
425
- """
426
- You can disable a cell via the cell context menu.
427
- marimo will never run a disabled cell or any cells that depend on it.
428
- This can help prevent accidental execution of expensive computations
429
- when editing a notebook.
430
- """
431
- ),
432
- "Code Folding": (
433
- """
434
- You can collapse or fold the code in a cell by clicking the arrow
435
- icons in the line number column to the left, or by using keyboard
436
- shortcuts.
437
-
438
- Use the command palette (`Ctrl/Cmd+k`) or a keyboard shortcut to
439
- quickly fold or unfold all cells.
440
- """
441
- ),
442
- "Code Formatting": (
443
- """
444
- If you have [ruff](https://github.com/astral-sh/ruff) installed,
445
- you can format a cell with the keyboard shortcut `Ctrl/Cmd+b`.
446
- """
447
- ),
448
- "Command Palette": (
449
- """
450
- Use `Ctrl/Cmd+k` to open the command palette.
451
- """
452
- ),
453
- "Keyboard Shortcuts": (
454
- """
455
- Open the notebook menu (top-right) or input `Ctrl/Cmd+Shift+h` to
456
- view a list of all keyboard shortcuts.
457
- """
458
- ),
459
- "Configuration": (
460
- """
461
- Configure the editor by clicking the gears icon near the top-right
462
- of the screen.
463
- """
464
- ),
465
- }
466
- return (tips,)
467
 
468
 
469
  if __name__ == "__main__":
470
- app.run()
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "chromadb==1.0.4",
5
+ # "datasets==3.5.0",
6
+ # "marimo",
7
+ # "matplotlib==3.10.1",
8
+ # "numpy==2.2.4",
9
+ # "open-clip-torch==2.32.0",
10
+ # "pillow==11.1.0",
11
+ # ]
12
+ # ///
13
+
14
  import marimo
15
 
16
+ __generated_with = "0.12.8"
17
+ app = marimo.App(width="medium")
18
 
19
 
20
  @app.cell
21
+ def _():
22
  import marimo as mo
 
 
23
  return (mo,)
24
 
25
 
26
+ @app.cell(hide_code=True)
27
+ def _(mo):
 
 
 
 
 
 
28
  mo.md(
29
+ r"""
30
+ # Multimodal Retrieval
31
 
32
+ Chroma supports multimodal collections, i.e. collections which contain, and can be queried by, multiple modalities of data.
 
 
33
 
34
+ This notebook shows an example of how to create and query a collection with both text and images, using Chroma's built-in features.
35
  """
36
  )
37
  return
38
 
39
 
40
  @app.cell(hide_code=True)
41
+ def _(mo):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  mo.md(
43
+ r"""
44
+ ## Dataset
 
 
 
 
45
 
46
+ We us a small subset of the [coco object detection dataset](https://huggingface.co/datasets/detection-datasets/coco), hosted on HuggingFace.
47
 
48
+ We download a small fraction of all the images in the dataset locally, and use it to create a multimodal collection.
 
 
49
  """
50
+ )
51
+ return
52
 
 
 
53
 
54
+ @app.cell
55
+ def _():
56
+ import os
57
 
58
+ from datasets import load_dataset
59
+ from matplotlib import pyplot as plt
60
+ return load_dataset, os
 
 
 
61
 
62
 
63
+ @app.cell
64
+ def _(load_dataset, mo):
65
+ with mo.status.spinner(title="Loading dataset"):
66
+ dataset = load_dataset(
67
+ path="detection-datasets/coco",
68
+ name="default",
69
+ split="train",
70
+ streaming=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  )
72
+
73
+ N_IMAGES = 20
74
+ return N_IMAGES, dataset
75
 
76
 
77
  @app.cell
78
+ def _(N_IMAGES, dataset, mo, os):
79
+ # Write the images to a folder
80
+ IMAGE_FOLDER = "images"
81
+ os.makedirs(IMAGE_FOLDER, exist_ok=True)
82
+ i = 0
83
+ all_images = []
84
+ with mo.status.spinner(title="Loading images"):
85
+ for row in dataset.take(N_IMAGES):
86
+ image = row["image"]
87
+ all_images.append(image)
88
+ image.save(f"images/{i}.jpg")
89
+ i += 1
90
+ return IMAGE_FOLDER, all_images
91
 
92
 
93
  @app.cell(hide_code=True)
94
+ def _(mo):
95
+ img_width = mo.ui.slider(
96
+ label="Image width", start=100, stop=300, step=10, debounce=True
 
 
 
 
 
 
 
 
 
 
97
  )
98
+ img_width
99
+ return (img_width,)
100
 
101
 
102
  @app.cell(hide_code=True)
103
+ def _(all_images, img_width, mo):
104
+ import io
 
 
 
 
 
 
 
105
 
106
 
107
+ def as_image(src):
108
+ img_byte_arr = io.BytesIO()
109
+ src.save(img_byte_arr, format=src.format or "PNG")
110
+ img_byte_arr.seek(0)
111
+ return mo.image(img_byte_arr, width=img_width.value)
 
 
 
 
 
 
 
 
112
 
113
 
114
+ mo.hstack(
115
+ [as_image(_img) for _img in all_images[10:]],
116
+ wrap=True,
 
 
 
 
 
 
 
 
117
  )
118
  return
119
 
120
 
121
  @app.cell(hide_code=True)
122
+ def _(mo):
123
  mo.md(
124
+ r"""
125
+ ## Ingesting multimodal data
126
 
127
+ Chroma supports multimodal collections by referencing external URIs for data types other than text.
128
+ All you have to do is specify a data loader when creating the collection, and then provide the URI for each entry.
 
 
129
 
130
+ For this example, we are only adding images, though you can also add text.
 
131
  """
132
  )
133
  return
134
 
135
 
136
+ @app.cell(hide_code=True)
137
+ def _(mo):
138
+ mo.md(
139
+ r"""
140
+ ### Creating a multi-modal collection
141
+
142
+ First we create the default Chroma client.
143
+ """
144
+ )
145
  return
146
 
147
 
148
  @app.cell
149
+ def _():
150
+ import chromadb
 
151
 
152
+ client = chromadb.Client()
153
+ return (client,)
154
 
 
 
 
 
155
 
156
+ @app.cell(hide_code=True)
157
+ def _(mo):
158
+ mo.md(
159
+ r"""
160
+ Next we specify an embedding function and a data loader.
161
 
162
+ The built-in `OpenCLIPEmbeddingFunction` works with both text and image data. The `ImageLoader` is a simple data loader that loads images from a local directory.
163
+ """
164
+ )
165
  return
166
 
167
 
168
  @app.cell
169
+ def _():
170
+ from chromadb.utils.data_loaders import ImageLoader
171
+ from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
172
+
173
+ embedding_function = OpenCLIPEmbeddingFunction()
174
+ image_loader = ImageLoader()
175
+ return embedding_function, image_loader
176
 
177
 
178
  @app.cell(hide_code=True)
179
+ def _(mo):
180
+ mo.md(r"""We create a collection with the embedding function and data loader.""")
181
+ return
 
182
 
 
 
 
183
 
184
+ @app.cell
185
+ def _(IMAGE_FOLDER, client, embedding_function, image_loader, os):
186
+ collection = client.create_collection(
187
+ name="multimodal_collection",
188
+ embedding_function=embedding_function,
189
+ data_loader=image_loader,
190
+ get_or_create=True,
191
+ )
192
 
193
+ # Get the uris to the images
194
+ image_uris = sorted(
195
+ [
196
+ os.path.join(IMAGE_FOLDER, image_name)
197
+ for image_name in os.listdir(IMAGE_FOLDER)
198
+ ]
 
199
  )
200
+ ids = [str(i) for i in range(len(image_uris))]
201
+
202
+ collection.add(ids=ids, uris=image_uris)
203
+ return (collection,)
204
 
205
 
206
  @app.cell(hide_code=True)
207
+ def _(mo):
208
  mo.md(
209
+ r"""
210
+ ### Adding multi-modal data
 
 
 
211
 
212
+ We add image data to the collection using the image URIs. The data loader and embedding functions we specified earlier will ingest data from the provided URIs automatically.
 
 
213
  """
214
  )
215
  return
216
 
217
 
218
  @app.cell(hide_code=True)
219
+ def _(mo):
220
  mo.md(
221
+ r"""
222
+ ## Querying a multi-modal collection
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ We can query the collection using text as normal, since the `OpenCLIPEmbeddingFunction` works with both text and images.
225
+ """
226
+ )
227
+ return
 
 
 
 
 
 
 
 
 
 
 
228
 
 
 
 
 
 
 
 
 
 
 
229
 
230
+ @app.cell(hide_code=True)
231
+ def _(mo):
232
+ query = mo.ui.text_area(label="Query with text", full_width=True).form(
233
+ bordered=False
234
+ )
235
+ mo.vstack([query, mo.md("Try: *animal* or *vehicle*")])
236
+ return (query,)
237
 
 
 
 
238
 
239
+ @app.cell
240
+ def _(collection, mo, query):
241
+ mo.stop(not query.value)
242
+ _retrieved = collection.query(
243
+ query_texts=[query.value], include=["data"], n_results=3
244
  )
245
+
246
+ [mo.image(img, height=200) for img in _retrieved["data"][0]]
247
  return
248
 
249
 
250
  @app.cell(hide_code=True)
251
+ def _(mo):
252
  mo.md(
253
+ r"""
254
+ /// admonition | One more thing!
255
+ We can also query by images directly, by using the `query_images` field in the `collection.query` method.
256
+ ///
257
  """
258
  )
259
  return
260
 
261
 
262
  @app.cell
263
+ def _(collection, mo, selected_image):
264
+ mo.stop(not selected_image.value)
265
+ import numpy as np
266
+ from PIL import Image
267
+
268
+ query_image = np.array(Image.open(selected_image.path()))
269
+ selected = mo.as_html(mo.image(query_image))
270
+
271
+ _retrieved = collection.query(
272
+ query_images=[query_image], include=["data"], n_results=5
273
+ )
274
+ results = [mo.image(_img) for _img in _retrieved["data"][0][1:]]
275
+ return results, selected
276
 
277
 
278
  @app.cell(hide_code=True)
279
+ def _(IMAGE_FOLDER, mo):
280
+ selected_image = mo.ui.file_browser(IMAGE_FOLDER, multiple=False)
281
+ selected_image
282
+ return (selected_image,)
283
 
284
 
285
  @app.cell(hide_code=True)
286
+ def _(mo, results, selected):
287
+ mo.hstack(
288
+ [
289
+ mo.vstack([mo.md("## Selected"), selected]),
290
+ mo.vstack([mo.md("## Similar"), *results]),
291
+ ],
292
+ widths="equal",
293
+ gap=4,
294
  )
295
  return
296
 
297
 
298
  @app.cell(hide_code=True)
299
+ def _(mo):
300
+ mo.md(r"""This example was adapted from [multimodal_retrieval.ipynb](https://github.com/chroma-core/chroma/blob/main/examples/multimodal/multimodal_retrieval.ipynb), using `marimo convert`.""")
301
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
 
304
  if __name__ == "__main__":
305
+ app.run()