cfahlgren1's picture
cfahlgren1 HF Staff
fix chart
1de78b4
metadata
title: 🤗 Hub Stats

Note: The charts are updated daily via the hub-stats dataset. It may take a little while for the data to load as the queries are running entirely in the browser.

-- Hub Growth by Month (using DATE_TRUNC for proper date ordering)
WITH all_data AS (
  SELECT 
    CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month, 
    'model'   AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
  WHERE createdAt IS NOT NULL

  UNION ALL

  SELECT 
    CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month, 
    'dataset' AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/datasets.parquet')
  WHERE createdAt IS NOT NULL

  UNION ALL

  SELECT 
    CAST(DATE_TRUNC('month', createdAt) AS DATE) AS month, 
    'space'   AS repo 
  FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet?download=true')
  WHERE createdAt IS NOT NULL
)
SELECT
  month,
  repo,
  COUNT(*) AS creations
FROM all_data
GROUP BY month, repo
ORDER BY month, repo

Cumulative Hub Growth

-- Cumulative Hub Growth
WITH ordered_data AS (
  SELECT
    month,
    repo,
    creations,
    ROW_NUMBER() OVER (PARTITION BY repo ORDER BY month) AS row_num
  FROM ${hub_growth}
)
SELECT
  month,
  repo,
  SUM(creations) OVER (PARTITION BY repo ORDER BY month) AS cumulative_creations
FROM ordered_data
ORDER BY month, repo

Models, Datasets, Spaces created per month.

Models Created Each Month

-- Models Created Each Month
SELECT
  month,
  repo,
  creations
FROM ${hub_growth}
WHERE repo = 'model'
ORDER BY month;

Datasets Created Each Month

-- Datasets Created Each Month
SELECT
  month,
  repo,
  creations
FROM ${hub_growth}
WHERE repo = 'dataset'
ORDER BY month;

Spaces Created Each Month

-- Spaces Created Each Month
SELECT
  month,
  repo,
  creations
FROM ${hub_growth}
WHERE repo = 'space'
ORDER BY month;

Model Downloads by Pipeline Tag (Last 30 days)

SELECT
    pipeline_tag AS name,
    SUM(downloads) AS value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
GROUP BY pipeline_tag
ORDER BY value DESC

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_pipeline_downloads], label: { show: true, formatter: '{b}: {c}' } } ] } } />

Model Downloads by Library (Last 30 days)

SELECT
    library_name AS name,
    SUM(downloads) AS value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true')
GROUP BY library_name
ORDER BY value DESC

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_library_downloads], label: { show: true, formatter: '{b}: {c}' } } ] } } />

Model Licenses

SELECT
    SUBSTRING(name, 9) AS name,
    COUNT(*) as value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/models.parquet?download=true'),
    UNNEST(tags) AS t(name)
WHERE name LIKE 'license:%'
GROUP BY name

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...model_license_ratio], } ] } } />

Dataset Licenses

SELECT
    SUBSTRING(name, 9) AS name,
    COUNT(*) as value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/datasets.parquet?download=true'),
    UNNEST(tags) AS t(name)
WHERE name LIKE 'license:%'
GROUP BY name

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...dataset_license_ratio], } ] } } />

Space SDKs

SELECT
    sdk as name,
    COUNT(*) as value
FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet?download=true')
GROUP BY name

<ECharts config={ { tooltip: { formatter: '{b}: {c} ({d}%)' }, series: [ { type: 'pie', radius: ['40%', '70%'], data: [...space_sdk_ratio], } ] } } />

Hub Stats Dataset