Spaces:

ai-conferences
/

ICLR2025

Running on Zero

App Files Files Community

hysts HF Staff commited on Mar 31

Commit

d06d36f

1 Parent(s): 53d72f4

Add files

Browse files

Files changed (12) hide show

.pre-commit-config.yaml +33 -0
.python-version +1 -0
.vscode/extensions.json +8 -0
.vscode/settings.json +17 -0
README.md +6 -3
app.py +208 -0
app_pr.py +403 -0
pyproject.toml +54 -0
requirements.txt +225 -0
style.css +4 -0
table.py +116 -0
uv.lock +0 -0

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.2
+    hooks:
+      - id: ruff
+        args: ["--fix"]
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.15.0
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          [
+            "types-python-slugify",
+            "types-pytz",
+            "types-PyYAML",
+            "types-requests",
+          ]

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "recommendations": [
+        "ms-python.python",
+        "charliermarsh.ruff",
+        "streetsidesoftware.code-spell-checker",
+        "tamasfe.even-better-toml"
+    ]
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": false,
+    "[python]": {
+        "editor.defaultFormatter": "charliermarsh.ruff",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.fixAll.ruff": "explicit",
+            "source.organizeImports": "explicit"
+        }
+    },
+    "[jupyter]": {
+        "files.insertFinalNewline": false
+    },
+    "notebook.output.scrolling": true,
+    "notebook.formatOnSave.enabled": true
+}

README.md CHANGED Viewed

@@ -1,12 +1,15 @@
 ---
 title: ICLR2025
-emoji: 📚
-colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 5.23.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: ICLR2025
+emoji: ⚡
+colorFrom: red
 colorTo: purple
 sdk: gradio
+sdk_version: 5.25.2
 app_file: app.py
 pinned: false
+hf_oauth: true
+hf_oauth_scopes:
+  - write-discussions
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,208 @@

+#!/usr/bin/env python
+import gradio as gr
+import polars as pl
+from app_pr import demo as demo_pr
+from table import df_orig
+DESCRIPTION = "# ICLR 2025"
+TUTORIAL = """\
+#### Claiming Authorship for Papers on arXiv
+If your ICLR 2025 paper is available on arXiv and listed in the table below, you can claim authorship by following these steps:
+1. Find your paper in the table.
+2. Click the link to the paper page in the table.
+3. On that page, click your name.
+4. Click **"Claim authorship"**.
+    - You'll be redirected to the *Papers* section of your Settings.
+5. Confirm the request on the redirected page.
+The admin team will review your request shortly.
+Once confirmed, your paper page will be marked as verified, and you'll be able to add a project page and a GitHub repository.
+If you need further help, check out the [guide here](https://huggingface.co/docs/hub/paper-pages).
+#### Updating Missing or Incorrect Information in the Table
+If you notice any missing or incorrect information in the table, feel free to submit a PR via the "Open PR" page, which you can find at the top right of this page.
+"""
+# TODO: remove this once https://github.com/gradio-app/gradio/issues/10916 https://github.com/gradio-app/gradio/issues/11001 https://github.com/gradio-app/gradio/issues/11002 are fixed  # noqa: TD002, FIX002
+NOTE = """\
+Note: Sorting by upvotes or comments may not work correctly due to a known bug in Gradio.
+"""
+df_main = df_orig.select(
+    "title",
+    "authors_str",
+    "openreview_md",
+    "type",
+    "paper_page_md",
+    "upvotes",
+    "num_comments",
+    "project_page_md",
+    "github_md",
+    "Spaces",
+    "Models",
+    "Datasets",
+    "claimed",
+)
+df_main = df_main.rename(
+    {
+        "title": "Title",
+        "authors_str": "Authors",
+        "openreview_md": "OpenReview",
+        "type": "Type",
+        "paper_page_md": "Paper page",
+        "upvotes": "👍",
+        "num_comments": "💬",
+        "project_page_md": "Project page",
+        "github_md": "GitHub",
+    }
+)
+COLUMN_INFO = {
+    "Title": ("str", "40%"),
+    "Authors": ("str", "20%"),
+    "Type": ("str", None),
+    "Paper page": ("markdown", "135px"),
+    "👍": ("number", "50px"),
+    "💬": ("number", "50px"),
+    "OpenReview": ("markdown", None),
+    "Project page": ("markdown", None),
+    "GitHub": ("markdown", None),
+    "Spaces": ("markdown", None),
+    "Models": ("markdown", None),
+    "Datasets": ("markdown", None),
+    "claimed": ("markdown", None),
+}
+DEFAULT_COLUMNS = [
+    "Title",
+    "Type",
+    "Paper page",
+    "👍",
+    "💬",
+    "OpenReview",
+    "Project page",
+    "GitHub",
+    "Spaces",
+    "Models",
+]
+def update_num_papers(df: pl.DataFrame) -> str:
+    if "claimed" in df.columns:
+        return f"{len(df)} / {len(df_main)} ({df.select(pl.col('claimed').str.contains('✅').sum()).item()} claimed)"
+    return f"{len(df)} / {len(df_main)}"
+def update_df(
+    title_search_query: str,
+    presentation_type: str,
+    column_names: list[str],
+    case_insensitive: bool = True,
+) -> gr.Dataframe:
+    df = df_main.clone()
+    column_names = ["Title", *column_names]
+    if title_search_query:
+        if case_insensitive:
+            title_search_query = f"(?i){title_search_query}"
+        try:
+            df = df.filter(pl.col("Title").str.contains(title_search_query))
+        except pl.exceptions.ComputeError as e:
+            raise gr.Error(str(e)) from e
+    if presentation_type != "(ALL)":
+        df = df.filter(pl.col("Type").str.contains(presentation_type))
+    sorted_column_names = [col for col in COLUMN_INFO if col in column_names]
+    df = df.select(sorted_column_names)
+    return gr.Dataframe(
+        value=df,
+        datatype=[COLUMN_INFO[col][0] for col in sorted_column_names],
+        column_widths=[COLUMN_INFO[col][1] for col in sorted_column_names],
+    )
+with gr.Blocks(css_paths="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Accordion(label="Tutorial", open=True):
+        gr.Markdown(TUTORIAL)
+    with gr.Group():
+        search_title = gr.Textbox(label="Search title")
+        presentation_type = gr.Radio(
+            label="Presentation Type",
+            choices=["(ALL)", "Oral", "Spotlight", "Poster"],
+            value="(ALL)",
+        )
+        column_names = gr.CheckboxGroup(
+            label="Columns",
+            choices=[col for col in COLUMN_INFO if col != "Title"],
+            value=[col for col in DEFAULT_COLUMNS if col != "Title"],
+        )
+    num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(df_orig), interactive=False)
+    gr.Markdown(NOTE)
+    df = gr.Dataframe(
+        value=df_main,
+        datatype=list(COLUMN_INFO.values()),
+        type="polars",
+        row_count=(0, "dynamic"),
+        show_row_numbers=True,
+        interactive=False,
+        max_height=1000,
+        elem_id="table",
+        column_widths=[COLUMN_INFO[col][1] for col in COLUMN_INFO],
+    )
+    inputs = [
+        search_title,
+        presentation_type,
+        column_names,
+    ]
+    gr.on(
+        triggers=[
+            search_title.submit,
+            presentation_type.input,
+            column_names.input,
+        ],
+        fn=update_df,
+        inputs=inputs,
+        outputs=df,
+        api_name=False,
+    ).then(
+        fn=update_num_papers,
+        inputs=df,
+        outputs=num_papers,
+        queue=False,
+        api_name=False,
+    )
+    demo.load(
+        fn=update_df,
+        inputs=inputs,
+        outputs=df,
+        api_name=False,
+    ).then(
+        fn=update_num_papers,
+        inputs=df,
+        outputs=num_papers,
+        queue=False,
+        api_name=False,
+    )
+with demo.route("Open PR"):
+    demo_pr.render()
+if __name__ == "__main__":
+    demo.queue(api_open=False).launch(show_api=False)

app_pr.py ADDED Viewed

	@@ -0,0 +1,403 @@

+import datetime
+import difflib
+import json
+import re
+import tempfile
+import gradio as gr
+import polars as pl
+from gradio_modal import Modal
+from huggingface_hub import CommitOperationAdd, HfApi
+from table import PATCH_REPO_ID, PATCH_REPO_PR_BRANCH, df_orig
+# TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed  # noqa: FIX002, TD002
+NOTE = """\
+#### ⚠️ Note
+You may encounter an issue when selecting table data after using the search bar.
+This is due to a known bug in Gradio.
+The issue typically occurs when multiple rows remain after filtering.
+If only one row remains, the selection should work as expected.
+"""
+api = HfApi()
+PR_VIEW_COLUMNS = [
+    "title",
+    "authors_str",
+    "openreview_md",
+    "arxiv_id",
+    "github_md",
+    "Spaces",
+    "Models",
+    "Datasets",
+    "paper_id",
+]
+PR_RAW_COLUMNS = [
+    "paper_id",
+    "title",
+    "authors",
+    "arxiv_id",
+    "project_page",
+    "github",
+    "space_ids",
+    "model_ids",
+    "dataset_ids",
+]
+df_pr_view = df_orig.with_columns(pl.lit("📝").alias("Fix")).select(["Fix", *PR_VIEW_COLUMNS])
+df_pr_view = df_pr_view.with_columns(pl.col("arxiv_id").fill_null(""))
+df_pr_raw = df_orig.select(PR_RAW_COLUMNS)
+def df_pr_row_selected(
+    evt: gr.SelectData,
+) -> tuple[
+    Modal,
+    gr.Textbox,  # title
+    gr.Textbox,  # authors
+    gr.Textbox,  # arxiv_id
+    gr.Textbox,  # project_page
+    gr.Textbox,  # github
+    gr.Textbox,  # space_ids
+    gr.Textbox,  # model_ids
+    gr.Textbox,  # dataset_ids
+    dict | None,  # original_data
+]:
+    if evt.value != "📝":
+        return (
+            Modal(),
+            gr.Textbox(),  # title
+            gr.Textbox(),  # authors
+            gr.Textbox(),  # arxiv_id
+            gr.Textbox(),  # project_page
+            gr.Textbox(),  # github
+            gr.Textbox(),  # space_ids
+            gr.Textbox(),  # model_ids
+            gr.Textbox(),  # dataset_ids
+            None,  # original_data
+        )
+    paper_id = evt.row_value[-1]
+    row = df_pr_raw.filter(pl.col("paper_id") == paper_id)
+    original_data = row.to_dicts()[0]
+    authors = original_data["authors"]
+    space_ids = original_data["space_ids"]
+    model_ids = original_data["model_ids"]
+    dataset_ids = original_data["dataset_ids"]
+    return (
+        Modal(visible=True),
+        gr.Textbox(value=row["title"].item()),  # title
+        gr.Textbox(value="\n".join(authors)),  # authors
+        gr.Textbox(value=row["arxiv_id"].item()),  # arxiv_id
+        gr.Textbox(value=row["project_page"].item()),  # project_page
+        gr.Textbox(value=row["github"].item()),  # github
+        gr.Textbox(value="\n".join(space_ids)),  # space_ids
+        gr.Textbox(value="\n".join(model_ids)),  # model_ids
+        gr.Textbox(value="\n".join(dataset_ids)),  # dataset_ids
+        original_data,  # original_data
+    )
+URL_PATTERN = re.compile(r"^(https?://)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(:\d+)?(/.*)?$")
+GITHUB_PATTERN = re.compile(r"^https://github\.com/[^/\s]+/[^/\s]+(/tree/[^/\s]+/[^/\s].*)?$")
+REPO_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$")
+ARXIV_ID_PATTERN = re.compile(r"^\d{4}\.\d{4,5}$")
+def is_valid_url(url: str) -> bool:
+    return URL_PATTERN.match(url) is not None
+def is_valid_github_url(url: str) -> bool:
+    return GITHUB_PATTERN.match(url) is not None
+def is_valid_repo_id(repo_id: str) -> bool:
+    return REPO_ID_PATTERN.match(repo_id) is not None
+def is_valid_arxiv_id(arxiv_id: str) -> bool:
+    return ARXIV_ID_PATTERN.match(arxiv_id) is not None
+def validate_pr_data(
+    title_pr: str,
+    authors_pr: str,
+    arxiv_id_pr: str,
+    project_page_pr: str,
+    github_pr: str,
+    space_ids: list[str],
+    model_ids: list[str],
+    dataset_ids: list[str],
+) -> None:
+    if not title_pr:
+        raise gr.Error("Title cannot be empty", print_exception=False)
+    if not authors_pr:
+        raise gr.Error("Authors cannot be empty", print_exception=False)
+    if arxiv_id_pr and not is_valid_arxiv_id(arxiv_id_pr):
+        raise gr.Error(
+            "Invalid arXiv ID format. Expected format: 'YYYY.NNNNN' (e.g., '2023.01234')", print_exception=False
+        )
+    if project_page_pr and not is_valid_url(project_page_pr):
+        raise gr.Error("Project page must be a valid URL", print_exception=False)
+    if github_pr and not is_valid_github_url(github_pr):
+        raise gr.Error("GitHub must be a valid GitHub URL", print_exception=False)
+    for repo_id in space_ids + model_ids + dataset_ids:
+        if not is_valid_repo_id(repo_id):
+            error_msg = f"Space/Model/Dataset ID must be in the format 'org_name/repo_name'. Got: {repo_id}"
+            raise gr.Error(error_msg, print_exception=False)
+def format_submitted_data(
+    title_pr: str,
+    authors_pr: str,
+    arxiv_id_pr: str,
+    project_page_pr: str,
+    github_pr: str,
+    space_ids_pr: str,
+    model_ids_pr: str,
+    dataset_ids_pr: str,
+) -> dict:
+    space_ids = [repo_id for repo_id in space_ids_pr.split("\n") if repo_id.strip()]
+    model_ids = [repo_id for repo_id in model_ids_pr.split("\n") if repo_id.strip()]
+    dataset_ids = [repo_id for repo_id in dataset_ids_pr.split("\n") if repo_id.strip()]
+    validate_pr_data(title_pr, authors_pr, arxiv_id_pr, project_page_pr, github_pr, space_ids, model_ids, dataset_ids)
+    return {
+        "title": title_pr,
+        "authors": [a for a in authors_pr.split("\n") if a.strip()],
+        "arxiv_id": arxiv_id_pr if arxiv_id_pr else None,
+        "project_page": project_page_pr if project_page_pr else None,
+        "github": github_pr if github_pr else None,
+        "space_ids": space_ids,
+        "model_ids": model_ids,
+        "dataset_ids": dataset_ids,
+    }
+def preview_diff(
+    title_pr: str,
+    authors_pr: str,
+    arxiv_id_pr: str,
+    project_page_pr: str,
+    github_pr: str,
+    space_ids_pr: str,
+    model_ids_pr: str,
+    dataset_ids_pr: str,
+    original_data: dict,
+) -> tuple[gr.Markdown, gr.Button]:
+    submitted_data = format_submitted_data(
+        title_pr,
+        authors_pr,
+        arxiv_id_pr,
+        project_page_pr,
+        github_pr,
+        space_ids_pr,
+        model_ids_pr,
+        dataset_ids_pr,
+    )
+    submitted_data = {"paper_id": original_data["paper_id"], **submitted_data}
+    original_json = json.dumps(original_data, indent=2)
+    submitted_json = json.dumps(submitted_data, indent=2)
+    diff = difflib.unified_diff(
+        original_json.splitlines(),
+        submitted_json.splitlines(),
+        fromfile="before",
+        tofile="after",
+        lineterm="",
+    )
+    diff_str = "\n".join(diff)
+    return gr.Markdown(value=f"```diff\n{diff_str}\n```"), gr.Button(visible=True)
+def open_pr(
+    title_pr: str,
+    authors_pr: str,
+    arxiv_id_pr: str,
+    project_page_pr: str,
+    github_pr: str,
+    space_ids_pr: str,
+    model_ids_pr: str,
+    dataset_ids_pr: str,
+    original_data: dict,
+    oauth_token: gr.OAuthToken | None,
+) -> gr.Markdown:
+    submitted_data = format_submitted_data(
+        title_pr,
+        authors_pr,
+        arxiv_id_pr,
+        project_page_pr,
+        github_pr,
+        space_ids_pr,
+        model_ids_pr,
+        dataset_ids_pr,
+    )
+    diff_dict = {key: submitted_data[key] for key in submitted_data if submitted_data[key] != original_data[key]}
+    if not diff_dict:
+        gr.Info("No data to submit")
+        return ""
+    paper_id = original_data["paper_id"]
+    diff_dict["paper_id"] = paper_id
+    original_json = json.dumps(original_data, indent=2)
+    submitted_json = json.dumps(submitted_data, indent=2)
+    diff = "\n".join(
+        difflib.unified_diff(
+            original_json.splitlines(),
+            submitted_json.splitlines(),
+            fromfile="before",
+            tofile="after",
+            lineterm="",
+        )
+    )
+    diff_dict["diff"] = diff
+    timestamp = datetime.datetime.now(datetime.timezone.utc)
+    diff_dict["timestamp"] = timestamp.isoformat()
+    with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f:
+        json.dump(diff_dict, f, indent=2)
+        f.flush()
+    commit = CommitOperationAdd(f"data/{paper_id}--{timestamp.strftime('%Y-%m-%d-%H-%M-%S')}.json", f.name)
+    res = api.create_commit(
+        repo_id=PATCH_REPO_ID,
+        operations=[commit],
+        commit_message=f"Update {paper_id}",
+        repo_type="dataset",
+        revision=PATCH_REPO_PR_BRANCH,
+        create_pr=True,
+        token=oauth_token.token if oauth_token else None,
+    )
+    return gr.Markdown(value=res.pr_url, visible=True)
+def render_open_pr_page(profile: gr.OAuthProfile | None) -> dict:
+    return gr.Column(visible=profile is not None)
+with gr.Blocks() as demo:
+    gr.LoginButton()
+    with gr.Column(visible=False) as open_pr_col:
+        gr.Markdown(NOTE)
+        df_pr = gr.Dataframe(
+            value=df_pr_view,
+            datatype=[
+                "str",  # Fix
+                "str",  # Title
+                "str",  # Authors
+                "markdown",  # openreview
+                "str",  # arxiv_id
+                "markdown",  # github
+                "markdown",  # spaces
+                "markdown",  # models
+                "markdown",  # datasets
+                "str",  # paper id
+            ],
+            column_widths=[
+                "50px",  # Fix
+                "40%",  # Title
+                "20%",  # Authors
+                None,  # openreview
+                "100px",  # arxiv_id
+                None,  # github
+                None,  # spaces
+                None,  # models
+                None,  # datasets
+                None,  # paper id
+            ],
+            type="polars",
+            row_count=(0, "dynamic"),
+            interactive=False,
+            max_height=1000,
+            show_search="search",
+        )
+        with Modal(visible=False) as pr_modal:
+            with gr.Group():
+                title_pr = gr.Textbox(label="Title")
+                authors_pr = gr.Textbox(label="Authors")
+                arxiv_id_pr = gr.Textbox(label="arXiv ID")
+                project_page_pr = gr.Textbox(label="Project page")
+                github_pr = gr.Textbox(label="GitHub")
+                spaces_pr = gr.Textbox(
+                    label="Spaces",
+                    info="Enter one space ID (e.g., 'org_name/space_name') per line.",
+                )
+                models_pr = gr.Textbox(
+                    label="Models",
+                    info="Enter one model ID (e.g., 'org_name/model_name') per line.",
+                )
+                datasets_pr = gr.Textbox(
+                    label="Datasets",
+                    info="Enter one dataset ID (e.g., 'org_name/dataset_name') per line.",
+                )
+                original_data = gr.State()
+            preview_diff_button = gr.Button("Preview diff")
+            diff_view = gr.Markdown()
+            open_pr_button = gr.Button("Open PR", visible=False)
+            pr_url = gr.Markdown(visible=False)
+    pr_modal.blur(
+        fn=lambda: (None, gr.Button(visible=False), gr.Markdown(visible=False)),
+        outputs=[diff_view, open_pr_button, pr_url],
+    )
+    df_pr.select(
+        fn=df_pr_row_selected,
+        outputs=[
+            pr_modal,
+            title_pr,
+            authors_pr,
+            arxiv_id_pr,
+            project_page_pr,
+            github_pr,
+            spaces_pr,
+            models_pr,
+            datasets_pr,
+            original_data,
+        ],
+    )
+    preview_diff_button.click(
+        fn=preview_diff,
+        inputs=[
+            title_pr,
+            authors_pr,
+            arxiv_id_pr,
+            project_page_pr,
+            github_pr,
+            spaces_pr,
+            models_pr,
+            datasets_pr,
+            original_data,
+        ],
+        outputs=[diff_view, open_pr_button],
+    )
+    open_pr_button.click(
+        fn=open_pr,
+        inputs=[
+            title_pr,
+            authors_pr,
+            arxiv_id_pr,
+            project_page_pr,
+            github_pr,
+            spaces_pr,
+            models_pr,
+            datasets_pr,
+            original_data,
+        ],
+        outputs=pr_url,
+    )
+    demo.load(fn=render_open_pr_page, outputs=open_pr_col)
+if __name__ == "__main__":
+    demo.queue(api_open=False).launch(show_api=False)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,54 @@

+[project]
+name = "iclr2025"
+version = "0.1.0"
+description = ""
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "datasets>=3.5.0",
+    "gradio[oauth]>=5.25.2",
+    "gradio-modal>=0.0.4",
+    "hf-transfer>=0.1.9",
+    "polars>=1.27.1",
+]
+[tool.ruff]
+line-length = 119
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    "COM812", # missing-trailing-comma
+    "D203",   # one-blank-line-before-class
+    "D213",   # multi-line-summary-second-line
+    "E501",   # line-too-long
+    "SIM117", # multiple-with-statements
+    #
+    "D100",    # undocumented-public-module
+    "D101",    # undocumented-public-class
+    "D102",    # undocumented-public-method
+    "D103",    # undocumented-public-function
+    "D104",    # undocumented-public-package
+    "D105",    # undocumented-magic-method
+    "D107",    # undocumented-public-init
+    "EM101",   # raw-string-in-exception
+    "FBT001",  # boolean-type-hint-positional-argument
+    "FBT002",  # boolean-default-value-positional-argument
+    "PD901",   # pandas-df-variable-name
+    "PGH003",  # blanket-type-ignore
+    "PLR0913", # too-many-arguments
+    "PLR0915", # too-many-statements
+    "TRY003",  # raise-vanilla-args
+]
+unfixable = [
+    "F401", # unused-import
+]
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.ruff.lint.per-file-ignores]
+"*.ipynb" = ["T201", "T203"]
+[tool.ruff.format]
+docstring-code-format = true

requirements.txt ADDED Viewed

	@@ -0,0 +1,225 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.11.16
+    # via
+    #   datasets
+    #   fsspec
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.3.0
+    # via aiohttp
+authlib==1.5.2
+    # via gradio
+certifi==2025.1.31
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via cryptography
+charset-normalizer==3.4.1
+    # via requests
+click==8.1.8
+    # via
+    #   typer
+    #   uvicorn
+cryptography==44.0.2
+    # via authlib
+datasets==3.5.0
+    # via iclr2025 (pyproject.toml)
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+exceptiongroup==1.2.2
+    # via anyio
+fastapi==0.115.12
+    # via gradio
+ffmpy==0.5.0
+    # via gradio
+filelock==3.18.0
+    # via
+    #   datasets
+    #   huggingface-hub
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.12.0
+    # via
+    #   datasets
+    #   gradio-client
+    #   huggingface-hub
+gradio==5.25.2
+    # via
+    #   iclr2025 (pyproject.toml)
+    #   gradio-modal
+gradio-client==1.8.0
+    # via gradio
+gradio-modal==0.0.4
+    # via iclr2025 (pyproject.toml)
+groovy==0.1.2
+    # via gradio
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-transfer==0.1.9
+    # via iclr2025 (pyproject.toml)
+httpcore==1.0.8
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   safehttpx
+huggingface-hub==0.30.2
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+itsdangerous==2.2.0
+    # via gradio
+jinja2==3.1.6
+    # via gradio
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.4.3
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+numpy==2.2.4
+    # via
+    #   datasets
+    #   gradio
+    #   pandas
+orjson==3.10.16
+    # via gradio
+packaging==24.2
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+pandas==2.2.3
+    # via
+    #   datasets
+    #   gradio
+pillow==11.2.1
+    # via gradio
+polars==1.27.1
+    # via iclr2025 (pyproject.toml)
+propcache==0.3.1
+    # via
+    #   aiohttp
+    #   yarl
+pyarrow==19.0.1
+    # via datasets
+pycparser==2.22
+    # via cffi
+pydantic==2.11.3
+    # via
+    #   fastapi
+    #   gradio
+pydantic-core==2.33.1
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.19.1
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+requests==2.32.3
+    # via
+    #   datasets
+    #   huggingface-hub
+rich==14.0.0
+    # via typer
+ruff==0.11.5
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via anyio
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   gradio
+tomlkit==0.13.2
+    # via gradio
+tqdm==4.67.1
+    # via
+    #   datasets
+    #   huggingface-hub
+typer==0.15.2
+    # via gradio
+typing-extensions==4.13.2
+    # via
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   multidict
+    #   pydantic
+    #   pydantic-core
+    #   rich
+    #   typer
+    #   typing-inspection
+    #   uvicorn
+typing-inspection==0.4.0
+    # via pydantic
+tzdata==2025.2
+    # via pandas
+urllib3==2.4.0
+    # via requests
+uvicorn==0.34.1
+    # via gradio
+websockets==15.0.1
+    # via gradio-client
+xxhash==3.5.0
+    # via datasets
+yarl==1.19.0
+    # via aiohttp

style.css ADDED Viewed

	@@ -0,0 +1,4 @@

+h1 {
+  text-align: center;
+  display: block;
+}

table.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import datasets
+import polars as pl
+BASE_REPO_ID = "ai-conferences/ICLR2025"
+PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
+PATCH_REPO_PR_BRANCH = "raw-jsons"
+PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
+def get_patch_latest_values(
+    df: pl.DataFrame, all_columns: list[str], id_col: str, timestamp_col: str = "timestamp"
+) -> pl.DataFrame:
+    df = df.sort(timestamp_col)
+    update_columns = [col for col in df.columns if col not in (id_col, timestamp_col)]
+    melted = df.unpivot(on=update_columns, index=[timestamp_col, id_col])
+    melted = melted.drop_nulls()
+    latest_rows = (
+        melted.sort(timestamp_col)
+        .group_by([id_col, "variable"])
+        .agg(pl.col("value").last())
+        .pivot("variable", index=id_col, values="value")
+    )
+    for col in all_columns:
+        if col != "id" and col not in latest_rows.columns:
+            latest_rows = latest_rows.with_columns(pl.lit(None).alias(col))
+    return latest_rows.select([id_col] + [col for col in all_columns if col != id_col])
+def format_author_claim_ratio(row: dict) -> str:
+    n_linked_authors = row["n_linked_authors"]
+    n_authors = row["n_authors"]
+    if n_linked_authors is None or n_authors is None:
+        return ""
+    author_linked = "✅" if n_linked_authors > 0 else ""
+    return f"{n_linked_authors}/{n_authors} {author_linked}".strip()
+df_orig = (
+    datasets.load_dataset(BASE_REPO_ID, split="train")
+    .to_polars()
+    .rename({"paper_url": "openreview", "submission_number": "paper_id"})
+    .with_columns(
+        pl.lit([], dtype=pl.List(pl.Utf8)).alias(col_name) for col_name in ["space_ids", "model_ids", "dataset_ids"]
+    )
+)
+df_paper_page = (
+    datasets.load_dataset(PAPER_PAGE_REPO_ID, split="train")
+    .to_polars()
+    .drop(["summary", "author_names", "ai_keywords"])
+)
+df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
+df_patches = datasets.load_dataset(PATCH_REPO_ID, revision="main", split="train").to_polars().drop("diff")
+df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
+df_orig = (
+    df_orig.join(df_patches, on="paper_id", how="left")
+    .with_columns(
+        [pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col) for col in df_orig.columns if col != "paper_id"]
+    )
+    .select(df_orig.columns)
+)
+# format authors
+df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
+# format links
+df_orig = df_orig.with_columns(
+    [
+        pl.format("[link]({})", pl.col(col)).fill_null("").alias(f"{col}_md")
+        for col in ["openreview", "project_page", "github"]
+    ]
+)
+# format paper page link
+df_orig = df_orig.with_columns(
+    (pl.lit("https://huggingface.co/papers/") + pl.col("arxiv_id")).alias("paper_page")
+).with_columns(pl.format("[{}]({})", pl.col("arxiv_id"), pl.col("paper_page")).fill_null("").alias("paper_page_md"))
+# count authors
+df_orig = df_orig.with_columns(pl.col("authors").list.len().alias("n_authors"))
+df_orig = df_orig.with_columns(
+    pl.col("author_usernames")
+    .map_elements(lambda lst: sum(x is not None for x in lst) if lst is not None else None, return_dtype=pl.Int64)
+    .alias("n_linked_authors")
+)
+df_orig = df_orig.with_columns(
+    pl.struct(["n_linked_authors", "n_authors"])
+    .map_elements(format_author_claim_ratio, return_dtype=pl.Utf8)
+    .alias("claimed")
+)
+# TODO: Fix this once https://github.com/gradio-app/gradio/issues/10916 is fixed # noqa: FIX002, TD002
+# format numbers as strings
+df_orig = df_orig.with_columns(
+    [pl.col(col).cast(pl.Utf8).fill_null("").alias(col) for col in ["upvotes", "num_comments"]]
+)
+# format spaces, models, datasets
+for repo_id_col, markdown_col, base_url in [
+    ("space_ids", "Spaces", "https://huggingface.co/spaces/"),
+    ("model_ids", "Models", "https://huggingface.co/"),
+    ("dataset_ids", "Datasets", "https://huggingface.co/datasets/"),
+]:
+    df_orig = df_orig.with_columns(
+        pl.col(repo_id_col)
+        .map_elements(
+            lambda lst: "\n".join([f"[link]({base_url}{x})" for x in lst]) if lst is not None else None,  # noqa: B023
+            return_dtype=pl.Utf8,
+        )
+        .fill_null("")
+        .alias(markdown_col)
+    )

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff