cfahlgren1 HF Staff commited on
Commit
1de78b4
·
1 Parent(s): a425fdf
Files changed (1) hide show
  1. pages/index.md +62 -34
pages/index.md CHANGED
@@ -5,25 +5,29 @@ title: 🤗 Hub Stats
5
  _Note: The charts are updated daily via the <a class="font-bold text-blue" href="https://huggingface.co/datasets/cfahlgren1/hub-stats" target="_blank" rel="noopener noreferrer">hub-stats dataset</a>. It may take a little while for the data to load as the queries are running entirely in the browser._
6
 
7
  ```sql hub_growth
 
8
  WITH all_data AS (
9
  SELECT
10
- DATE_TRUNC('month', createdAt::TIMESTAMP) AS month,
11
- 'model' AS repo
12
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
13
-
 
14
  UNION ALL
15
-
16
  SELECT
17
- DATE_TRUNC('month', createdAt::TIMESTAMP) AS month,
18
  'dataset' AS repo
19
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true')
20
-
 
21
  UNION ALL
22
-
23
  SELECT
24
- DATE_TRUNC('month', createdAt::TIMESTAMP) AS month,
25
- 'space' AS repo
26
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true')
 
27
  )
28
  SELECT
29
  month,
@@ -37,6 +41,7 @@ ORDER BY month, repo
37
  ## Cumulative Hub Growth
38
 
39
  ```sql cumulative_growth_by_repo
 
40
  WITH ordered_data AS (
41
  SELECT
42
  month,
@@ -75,9 +80,14 @@ ORDER BY month, repo
75
  ## Models Created Each Month
76
 
77
  ```sql model_creations_by_month
78
- SELECT month, repo, creations
 
 
 
 
79
  FROM ${hub_growth}
80
  WHERE repo = 'model'
 
81
  ```
82
 
83
  <AreaChart
@@ -91,9 +101,14 @@ WHERE repo = 'model'
91
  ## Datasets Created Each Month
92
 
93
  ```sql dataset_creations_by_month
94
- SELECT month, repo, creations
 
 
 
 
95
  FROM ${hub_growth}
96
  WHERE repo = 'dataset'
 
97
  ```
98
  <AreaChart
99
  data={dataset_creations_by_month}
@@ -106,9 +121,14 @@ WHERE repo = 'dataset'
106
  ## Spaces Created Each Month
107
 
108
  ```sql space_creations_by_month
109
- SELECT month, repo, creations
 
 
 
 
110
  FROM ${hub_growth}
111
  WHERE repo = 'space'
 
112
  ```
113
 
114
  <AreaChart
@@ -123,10 +143,11 @@ WHERE repo = 'space'
123
 
124
  ```sql model_pipeline_downloads
125
  SELECT
126
- pipeline_tag AS name, SUM(downloads) AS value
127
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
 
128
  GROUP BY pipeline_tag
129
- ORDER BY value DESC;
130
  ```
131
 
132
  <DataTable data={model_pipeline_downloads} search=true>
@@ -159,10 +180,11 @@ ORDER BY value DESC;
159
 
160
  ```sql model_library_downloads
161
  SELECT
162
- library_name AS name, SUM(downloads) AS value
163
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
 
164
  GROUP BY library_name
165
- ORDER BY value DESC;
166
  ```
167
 
168
  <DataTable data={model_library_downloads} search=true>
@@ -194,11 +216,13 @@ ORDER BY value DESC;
194
  # Model Licenses
195
 
196
  ```sql model_license_ratio
197
- SELECT SUBSTRING(name, 9) AS name, COUNT(*) as value
198
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true'),
199
- UNNEST(tags) AS t(name)
200
- WHERE name LIKE 'license:%'
201
- GROUP BY name;
 
 
202
  ```
203
 
204
  <ECharts config={
@@ -220,11 +244,13 @@ ORDER BY value DESC;
220
  # Dataset Licenses
221
 
222
  ```sql dataset_license_ratio
223
- SELECT SUBSTRING(name, 9) AS name, COUNT(*) as value
224
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true'),
225
- UNNEST(tags) AS t(name)
226
- WHERE name LIKE 'license:%'
227
- GROUP BY name;
 
 
228
  ```
229
 
230
  <ECharts config={
@@ -246,9 +272,11 @@ ORDER BY value DESC;
246
  # Space SDKs
247
 
248
  ```sql space_sdk_ratio
249
- SELECT sdk as name, COUNT(*) as value
250
- FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true')
251
- GROUP BY name;
 
 
252
  ```
253
 
254
 
 
5
  _Note: The charts are updated daily via the <a class="font-bold text-blue" href="https://huggingface.co/datasets/cfahlgren1/hub-stats" target="_blank" rel="noopener noreferrer">hub-stats dataset</a>. It may take a little while for the data to load as the queries are running entirely in the browser._
6
 
7
  ```sql hub_growth
8
+ -- Hub Growth by Month (using DATE_TRUNC for proper date ordering)
9
  WITH all_data AS (
10
  SELECT
11
+ CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month,
12
+ 'model' AS repo
13
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
14
+ WHERE createdAt IS NOT NULL
15
+
16
  UNION ALL
17
+
18
  SELECT
19
+ CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month,
20
  'dataset' AS repo
21
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/datasets.parquet')
22
+ WHERE createdAt IS NOT NULL
23
+
24
  UNION ALL
25
+
26
  SELECT
27
+ CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month,
28
+ 'space' AS repo
29
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet?download=true')
30
+ WHERE createdAt IS NOT NULL
31
  )
32
  SELECT
33
  month,
 
41
  ## Cumulative Hub Growth
42
 
43
  ```sql cumulative_growth_by_repo
44
+ -- Cumulative Hub Growth
45
  WITH ordered_data AS (
46
  SELECT
47
  month,
 
80
  ## Models Created Each Month
81
 
82
  ```sql model_creations_by_month
83
+ -- Models Created Each Month
84
+ SELECT
85
+ month,
86
+ repo,
87
+ creations
88
  FROM ${hub_growth}
89
  WHERE repo = 'model'
90
+ ORDER BY month;
91
  ```
92
 
93
  <AreaChart
 
101
  ## Datasets Created Each Month
102
 
103
  ```sql dataset_creations_by_month
104
+ -- Datasets Created Each Month
105
+ SELECT
106
+ month,
107
+ repo,
108
+ creations
109
  FROM ${hub_growth}
110
  WHERE repo = 'dataset'
111
+ ORDER BY month;
112
  ```
113
  <AreaChart
114
  data={dataset_creations_by_month}
 
121
  ## Spaces Created Each Month
122
 
123
  ```sql space_creations_by_month
124
+ -- Spaces Created Each Month
125
+ SELECT
126
+ month,
127
+ repo,
128
+ creations
129
  FROM ${hub_growth}
130
  WHERE repo = 'space'
131
+ ORDER BY month;
132
  ```
133
 
134
  <AreaChart
 
143
 
144
  ```sql model_pipeline_downloads
145
  SELECT
146
+ pipeline_tag AS name,
147
+ SUM(downloads) AS value
148
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
149
  GROUP BY pipeline_tag
150
+ ORDER BY value DESC
151
  ```
152
 
153
  <DataTable data={model_pipeline_downloads} search=true>
 
180
 
181
  ```sql model_library_downloads
182
  SELECT
183
+ library_name AS name,
184
+ SUM(downloads) AS value
185
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
186
  GROUP BY library_name
187
+ ORDER BY value DESC
188
  ```
189
 
190
  <DataTable data={model_library_downloads} search=true>
 
216
  # Model Licenses
217
 
218
  ```sql model_license_ratio
219
+ SELECT
220
+ SUBSTRING(name, 9) AS name,
221
+ COUNT(*) as value
222
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true'),
223
+ UNNEST(tags) AS t(name)
224
+ WHERE name LIKE 'license:%'
225
+ GROUP BY name
226
  ```
227
 
228
  <ECharts config={
 
244
  # Dataset Licenses
245
 
246
  ```sql dataset_license_ratio
247
+ SELECT
248
+ SUBSTRING(name, 9) AS name,
249
+ COUNT(*) as value
250
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/datasets.parquet?download=true'),
251
+ UNNEST(tags) AS t(name)
252
+ WHERE name LIKE 'license:%'
253
+ GROUP BY name
254
  ```
255
 
256
  <ECharts config={
 
272
  # Space SDKs
273
 
274
  ```sql space_sdk_ratio
275
+ SELECT
276
+ sdk as name,
277
+ COUNT(*) as value
278
+ FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet?download=true')
279
+ GROUP BY name
280
  ```
281
 
282