fdaudens's picture
fdaudens HF Staff
csv
7847f35
import tempfile
import csv
import pandas as pd
import gradio as gr
from huggingface_hub import HfApi
from pathlib import Path
import os
def get_model_stats(search_term):
# Initialize the API
api = HfApi()
# Create a temporary file for the CSV
temp_dir = tempfile.mkdtemp()
output_file = Path(temp_dir) / f"{search_term}_models_alltime.csv"
# Get the generator of models with the working sort parameter
print(f"Fetching {search_term} models with download statistics...")
models_generator = api.list_models(
search=search_term,
expand=["downloads", "downloadsAllTime"], # Get both 30-day and all-time downloads
sort="_id" # Sort by ID to avoid timeout issues
)
# Initialize counters for total downloads
total_30day_downloads = 0
total_alltime_downloads = 0
# Create and write to CSV
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
csv_writer = csv.writer(csvfile)
# Write header
csv_writer.writerow(["Model ID", "Downloads (30 days)", "Downloads (All Time)"])
# Process models
model_count = 0
for model in models_generator:
# Get download counts
downloads_30day = getattr(model, 'downloads', 0)
downloads_alltime = getattr(model, 'downloads_all_time', 0)
# Add to totals
total_30day_downloads += downloads_30day
total_alltime_downloads += downloads_alltime
# Write to CSV
csv_writer.writerow([
getattr(model, 'id', "Unknown"),
downloads_30day,
downloads_alltime
])
model_count += 1
# Read the CSV file into a pandas DataFrame
df = pd.read_csv(output_file)
# Create status message with total downloads
status_message = (
f"Found {model_count} models for search term '{search_term}'\n"
f"Total 30-day downloads: {total_30day_downloads:,}\n"
f"Total all-time downloads: {total_alltime_downloads:,}"
)
# Return both the DataFrame, status message, and the CSV file path
return df, status_message, str(output_file)
# Create the Gradio interface
with gr.Blocks(title="Hugging Face Model Statistics") as demo:
gr.Markdown("# Hugging Face Model Statistics")
gr.Markdown("Enter a search term to find model statistics from Hugging Face Hub")
with gr.Row():
search_input = gr.Textbox(
label="Search Term",
placeholder="Enter a model name or keyword (e.g., 'gemma', 'llama')",
value="gemma"
)
search_button = gr.Button("Search")
with gr.Row():
output_table = gr.Dataframe(
headers=["Model ID", "Downloads (30 days)", "Downloads (All Time)"],
datatype=["html", "number", "number"],
label="Model Statistics",
wrap=True
)
status_message = gr.Textbox(label="Status", lines=3)
with gr.Row():
download_button = gr.Button("Download CSV")
csv_file = gr.File(label="CSV File", visible=True)
# Store the CSV file path in a state
csv_path = gr.State()
def process_results(df, status, csv_path):
# Convert model IDs to clickable links
df['Model ID'] = df['Model ID'].apply(
lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>'
)
return df, status, csv_path
def get_csv_file(csv_path):
if csv_path and os.path.exists(csv_path):
return csv_path
return None
search_button.click(
fn=get_model_stats,
inputs=search_input,
outputs=[output_table, status_message, csv_path]
).then(
fn=process_results,
inputs=[output_table, status_message, csv_path],
outputs=[output_table, status_message, csv_path]
)
download_button.click(
fn=get_csv_file,
inputs=csv_path,
outputs=csv_file
)
if __name__ == "__main__":
demo.launch()