hysts HF Staff commited on
Commit
1c00c70
·
1 Parent(s): d06d36f
Files changed (5) hide show
  1. app_pr.py +1 -2
  2. pyproject.toml +1 -0
  3. requirements.txt +2 -0
  4. table.py +21 -9
  5. uv.lock +24 -0
app_pr.py CHANGED
@@ -9,7 +9,7 @@ import polars as pl
9
  from gradio_modal import Modal
10
  from huggingface_hub import CommitOperationAdd, HfApi
11
 
12
- from table import PATCH_REPO_ID, PATCH_REPO_PR_BRANCH, df_orig
13
 
14
  # TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed # noqa: FIX002, TD002
15
  NOTE = """\
@@ -273,7 +273,6 @@ def open_pr(
273
  operations=[commit],
274
  commit_message=f"Update {paper_id}",
275
  repo_type="dataset",
276
- revision=PATCH_REPO_PR_BRANCH,
277
  create_pr=True,
278
  token=oauth_token.token if oauth_token else None,
279
  )
 
9
  from gradio_modal import Modal
10
  from huggingface_hub import CommitOperationAdd, HfApi
11
 
12
+ from table import PATCH_REPO_ID, df_orig
13
 
14
  # TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed # noqa: FIX002, TD002
15
  NOTE = """\
 
273
  operations=[commit],
274
  commit_message=f"Update {paper_id}",
275
  repo_type="dataset",
 
276
  create_pr=True,
277
  token=oauth_token.token if oauth_token else None,
278
  )
pyproject.toml CHANGED
@@ -10,6 +10,7 @@ dependencies = [
10
  "gradio-modal>=0.0.4",
11
  "hf-transfer>=0.1.9",
12
  "polars>=1.27.1",
 
13
  ]
14
 
15
  [tool.ruff]
 
10
  "gradio-modal>=0.0.4",
11
  "hf-transfer>=0.1.9",
12
  "polars>=1.27.1",
13
+ "loguru>=0.7.3",
14
  ]
15
 
16
  [tool.ruff]
requirements.txt CHANGED
@@ -101,6 +101,8 @@ itsdangerous==2.2.0
101
  # via gradio
102
  jinja2==3.1.6
103
  # via gradio
 
 
104
  markdown-it-py==3.0.0
105
  # via rich
106
  markupsafe==3.0.2
 
101
  # via gradio
102
  jinja2==3.1.6
103
  # via gradio
104
+ loguru==0.7.3
105
+ # via iclr2025 (pyproject.toml)
106
  markdown-it-py==3.0.0
107
  # via rich
108
  markupsafe==3.0.2
table.py CHANGED
@@ -1,9 +1,9 @@
1
  import datasets
2
  import polars as pl
 
3
 
4
  BASE_REPO_ID = "ai-conferences/ICLR2025"
5
  PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
6
- PATCH_REPO_PR_BRANCH = "raw-jsons"
7
  PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
8
 
9
 
@@ -56,15 +56,27 @@ df_paper_page = (
56
  )
57
  df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
58
 
59
- df_patches = datasets.load_dataset(PATCH_REPO_ID, revision="main", split="train").to_polars().drop("diff")
60
- df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
61
- df_orig = (
62
- df_orig.join(df_patches, on="paper_id", how="left")
63
- .with_columns(
64
- [pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col) for col in df_orig.columns if col != "paper_id"]
65
  )
66
- .select(df_orig.columns)
67
- )
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # format authors
70
  df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
 
1
  import datasets
2
  import polars as pl
3
+ from loguru import logger
4
 
5
  BASE_REPO_ID = "ai-conferences/ICLR2025"
6
  PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
 
7
  PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
8
 
9
 
 
56
  )
57
  df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
58
 
59
+ try:
60
+ df_patches = (
61
+ datasets.load_dataset(PATCH_REPO_ID, split="train")
62
+ .to_polars()
63
+ .drop("diff")
64
+ .with_columns(pl.col("timestamp").str.strptime(pl.Datetime, "%+"))
65
  )
66
+ df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
67
+ df_orig = (
68
+ df_orig.join(df_patches, on="paper_id", how="left")
69
+ .with_columns(
70
+ [
71
+ pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col)
72
+ for col in df_orig.columns
73
+ if col != "paper_id"
74
+ ]
75
+ )
76
+ .select(df_orig.columns)
77
+ )
78
+ except Exception as e: # noqa: BLE001
79
+ logger.warning(e)
80
 
81
  # format authors
82
  df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
uv.lock CHANGED
@@ -745,6 +745,7 @@ dependencies = [
745
  { name = "gradio", extra = ["oauth"] },
746
  { name = "gradio-modal" },
747
  { name = "hf-transfer" },
 
748
  { name = "polars" },
749
  ]
750
 
@@ -754,6 +755,7 @@ requires-dist = [
754
  { name = "gradio", extras = ["oauth"], specifier = ">=5.25.2" },
755
  { name = "gradio-modal", specifier = ">=0.0.4" },
756
  { name = "hf-transfer", specifier = ">=0.1.9" },
 
757
  { name = "polars", specifier = ">=1.27.1" },
758
  ]
759
 
@@ -787,6 +789,19 @@ wheels = [
787
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
788
  ]
789
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  [[package]]
791
  name = "markdown-it-py"
792
  version = "3.0.0"
@@ -1849,6 +1864,15 @@ wheels = [
1849
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
1850
  ]
1851
 
 
 
 
 
 
 
 
 
 
1852
  [[package]]
1853
  name = "xxhash"
1854
  version = "3.5.0"
 
745
  { name = "gradio", extra = ["oauth"] },
746
  { name = "gradio-modal" },
747
  { name = "hf-transfer" },
748
+ { name = "loguru" },
749
  { name = "polars" },
750
  ]
751
 
 
755
  { name = "gradio", extras = ["oauth"], specifier = ">=5.25.2" },
756
  { name = "gradio-modal", specifier = ">=0.0.4" },
757
  { name = "hf-transfer", specifier = ">=0.1.9" },
758
+ { name = "loguru", specifier = ">=0.7.3" },
759
  { name = "polars", specifier = ">=1.27.1" },
760
  ]
761
 
 
789
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
790
  ]
791
 
792
+ [[package]]
793
+ name = "loguru"
794
+ version = "0.7.3"
795
+ source = { registry = "https://pypi.org/simple" }
796
+ dependencies = [
797
+ { name = "colorama", marker = "sys_platform == 'win32'" },
798
+ { name = "win32-setctime", marker = "sys_platform == 'win32'" },
799
+ ]
800
+ sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 }
801
+ wheels = [
802
+ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595 },
803
+ ]
804
+
805
  [[package]]
806
  name = "markdown-it-py"
807
  version = "3.0.0"
 
1864
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
1865
  ]
1866
 
1867
+ [[package]]
1868
+ name = "win32-setctime"
1869
+ version = "1.2.0"
1870
+ source = { registry = "https://pypi.org/simple" }
1871
+ sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867 }
1872
+ wheels = [
1873
+ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083 },
1874
+ ]
1875
+
1876
  [[package]]
1877
  name = "xxhash"
1878
  version = "3.5.0"