cfahlgren1's picture
cfahlgren1 HF Staff
improve titles
6cfe5ea
|
raw
history blame
5.72 kB
metadata
title: 🤗 Stats
WITH all_data AS (
  SELECT 
    DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 
    'model' AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
  
  UNION ALL
  
  SELECT 
    DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 
    'dataset' AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true')
  
  UNION ALL
  
  SELECT 
    DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 
    'space' AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true')
)
SELECT
  month,
  repo,
  COUNT(*) AS creations
FROM all_data
GROUP BY month, repo
ORDER BY month, repo

Models, Datasets, Spaces created per month.

<BarChart data={hub_growth} x=month y=creations colorPalette={[ '#cf0d06', '#eb5752', '#e88a87', '#fcdad9', ]} series=repo />

Models Created Each Month

SELECT month, repo, creations
FROM ${hub_growth}
WHERE repo = 'model'

Datasets Created Each Month

SELECT month, repo, creations
FROM ${hub_growth}
WHERE repo = 'dataset'

Spaces Created Each Month

SELECT month, repo, creations
FROM ${hub_growth}
WHERE repo = 'space'

Model Downloads by Pipeline Tag (Last 30 days)

SELECT
pipeline_tag AS name, SUM(downloads) AS value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
GROUP BY pipeline_tag
ORDER BY value DESC;

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_pipeline_downloads], label: { show: true, formatter: '{b}: {c}' } } ] } } />

Model Downloads by Library (Last 30 days)

SELECT
library_name AS name, SUM(downloads) AS value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true')
GROUP BY library_name
ORDER BY value DESC;

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_library_downloads], label: { show: true, formatter: '{b}: {c}' } } ] } } />

Model Licenses

  SELECT SUBSTRING(name, 9) AS name, COUNT(*) as value
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true'),
   UNNEST(tags) AS t(name)
  WHERE name LIKE 'license:%'
  GROUP BY name;

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_license_ratio], } ] } } />

Dataset Licenses

  SELECT SUBSTRING(name, 9) AS name, COUNT(*) as value
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true'),
  UNNEST(tags) AS t(name)
  WHERE name LIKE 'license:%'
  GROUP BY name;

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...dataset_license_ratio], } ] } } />

Space SDKs

  SELECT sdk as name, COUNT(*) as value
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true')
  GROUP BY name;

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...space_sdk_ratio], } ] } } />

Hub Stats Dataset