Spaces:
Running
on
Zero
Running
on
Zero
Update
Browse files- app_pr.py +1 -2
- pyproject.toml +1 -0
- requirements.txt +2 -0
- table.py +21 -9
- uv.lock +24 -0
app_pr.py
CHANGED
@@ -9,7 +9,7 @@ import polars as pl
|
|
9 |
from gradio_modal import Modal
|
10 |
from huggingface_hub import CommitOperationAdd, HfApi
|
11 |
|
12 |
-
from table import PATCH_REPO_ID,
|
13 |
|
14 |
# TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed # noqa: FIX002, TD002
|
15 |
NOTE = """\
|
@@ -273,7 +273,6 @@ def open_pr(
|
|
273 |
operations=[commit],
|
274 |
commit_message=f"Update {paper_id}",
|
275 |
repo_type="dataset",
|
276 |
-
revision=PATCH_REPO_PR_BRANCH,
|
277 |
create_pr=True,
|
278 |
token=oauth_token.token if oauth_token else None,
|
279 |
)
|
|
|
9 |
from gradio_modal import Modal
|
10 |
from huggingface_hub import CommitOperationAdd, HfApi
|
11 |
|
12 |
+
from table import PATCH_REPO_ID, df_orig
|
13 |
|
14 |
# TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed # noqa: FIX002, TD002
|
15 |
NOTE = """\
|
|
|
273 |
operations=[commit],
|
274 |
commit_message=f"Update {paper_id}",
|
275 |
repo_type="dataset",
|
|
|
276 |
create_pr=True,
|
277 |
token=oauth_token.token if oauth_token else None,
|
278 |
)
|
pyproject.toml
CHANGED
@@ -10,6 +10,7 @@ dependencies = [
|
|
10 |
"gradio-modal>=0.0.4",
|
11 |
"hf-transfer>=0.1.9",
|
12 |
"polars>=1.27.1",
|
|
|
13 |
]
|
14 |
|
15 |
[tool.ruff]
|
|
|
10 |
"gradio-modal>=0.0.4",
|
11 |
"hf-transfer>=0.1.9",
|
12 |
"polars>=1.27.1",
|
13 |
+
"loguru>=0.7.3",
|
14 |
]
|
15 |
|
16 |
[tool.ruff]
|
requirements.txt
CHANGED
@@ -101,6 +101,8 @@ itsdangerous==2.2.0
|
|
101 |
# via gradio
|
102 |
jinja2==3.1.6
|
103 |
# via gradio
|
|
|
|
|
104 |
markdown-it-py==3.0.0
|
105 |
# via rich
|
106 |
markupsafe==3.0.2
|
|
|
101 |
# via gradio
|
102 |
jinja2==3.1.6
|
103 |
# via gradio
|
104 |
+
loguru==0.7.3
|
105 |
+
# via iclr2025 (pyproject.toml)
|
106 |
markdown-it-py==3.0.0
|
107 |
# via rich
|
108 |
markupsafe==3.0.2
|
table.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import datasets
|
2 |
import polars as pl
|
|
|
3 |
|
4 |
BASE_REPO_ID = "ai-conferences/ICLR2025"
|
5 |
PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
|
6 |
-
PATCH_REPO_PR_BRANCH = "raw-jsons"
|
7 |
PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
|
8 |
|
9 |
|
@@ -56,15 +56,27 @@ df_paper_page = (
|
|
56 |
)
|
57 |
df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
|
58 |
|
59 |
-
|
60 |
-
df_patches =
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
)
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
# format authors
|
70 |
df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
|
|
|
1 |
import datasets
|
2 |
import polars as pl
|
3 |
+
from loguru import logger
|
4 |
|
5 |
BASE_REPO_ID = "ai-conferences/ICLR2025"
|
6 |
PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
|
|
|
7 |
PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
|
8 |
|
9 |
|
|
|
56 |
)
|
57 |
df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
|
58 |
|
59 |
+
try:
|
60 |
+
df_patches = (
|
61 |
+
datasets.load_dataset(PATCH_REPO_ID, split="train")
|
62 |
+
.to_polars()
|
63 |
+
.drop("diff")
|
64 |
+
.with_columns(pl.col("timestamp").str.strptime(pl.Datetime, "%+"))
|
65 |
)
|
66 |
+
df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
|
67 |
+
df_orig = (
|
68 |
+
df_orig.join(df_patches, on="paper_id", how="left")
|
69 |
+
.with_columns(
|
70 |
+
[
|
71 |
+
pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col)
|
72 |
+
for col in df_orig.columns
|
73 |
+
if col != "paper_id"
|
74 |
+
]
|
75 |
+
)
|
76 |
+
.select(df_orig.columns)
|
77 |
+
)
|
78 |
+
except Exception as e: # noqa: BLE001
|
79 |
+
logger.warning(e)
|
80 |
|
81 |
# format authors
|
82 |
df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
|
uv.lock
CHANGED
@@ -745,6 +745,7 @@ dependencies = [
|
|
745 |
{ name = "gradio", extra = ["oauth"] },
|
746 |
{ name = "gradio-modal" },
|
747 |
{ name = "hf-transfer" },
|
|
|
748 |
{ name = "polars" },
|
749 |
]
|
750 |
|
@@ -754,6 +755,7 @@ requires-dist = [
|
|
754 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.25.2" },
|
755 |
{ name = "gradio-modal", specifier = ">=0.0.4" },
|
756 |
{ name = "hf-transfer", specifier = ">=0.1.9" },
|
|
|
757 |
{ name = "polars", specifier = ">=1.27.1" },
|
758 |
]
|
759 |
|
@@ -787,6 +789,19 @@ wheels = [
|
|
787 |
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
|
788 |
]
|
789 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
790 |
[[package]]
|
791 |
name = "markdown-it-py"
|
792 |
version = "3.0.0"
|
@@ -1849,6 +1864,15 @@ wheels = [
|
|
1849 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
|
1850 |
]
|
1851 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1852 |
[[package]]
|
1853 |
name = "xxhash"
|
1854 |
version = "3.5.0"
|
|
|
745 |
{ name = "gradio", extra = ["oauth"] },
|
746 |
{ name = "gradio-modal" },
|
747 |
{ name = "hf-transfer" },
|
748 |
+
{ name = "loguru" },
|
749 |
{ name = "polars" },
|
750 |
]
|
751 |
|
|
|
755 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.25.2" },
|
756 |
{ name = "gradio-modal", specifier = ">=0.0.4" },
|
757 |
{ name = "hf-transfer", specifier = ">=0.1.9" },
|
758 |
+
{ name = "loguru", specifier = ">=0.7.3" },
|
759 |
{ name = "polars", specifier = ">=1.27.1" },
|
760 |
]
|
761 |
|
|
|
789 |
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
|
790 |
]
|
791 |
|
792 |
+
[[package]]
|
793 |
+
name = "loguru"
|
794 |
+
version = "0.7.3"
|
795 |
+
source = { registry = "https://pypi.org/simple" }
|
796 |
+
dependencies = [
|
797 |
+
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
798 |
+
{ name = "win32-setctime", marker = "sys_platform == 'win32'" },
|
799 |
+
]
|
800 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 }
|
801 |
+
wheels = [
|
802 |
+
{ url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595 },
|
803 |
+
]
|
804 |
+
|
805 |
[[package]]
|
806 |
name = "markdown-it-py"
|
807 |
version = "3.0.0"
|
|
|
1864 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
|
1865 |
]
|
1866 |
|
1867 |
+
[[package]]
|
1868 |
+
name = "win32-setctime"
|
1869 |
+
version = "1.2.0"
|
1870 |
+
source = { registry = "https://pypi.org/simple" }
|
1871 |
+
sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867 }
|
1872 |
+
wheels = [
|
1873 |
+
{ url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083 },
|
1874 |
+
]
|
1875 |
+
|
1876 |
[[package]]
|
1877 |
name = "xxhash"
|
1878 |
version = "3.5.0"
|