Spaces:

fdaudens
/

model-stats-search-keywords

Sleeping

App Files Files Community

model-stats-search-keywords / app.py

fdaudens HF Staff

csv

7847f35 13 days ago

raw

history blame contribute delete

4.14 kB

	import tempfile
	import csv
	import pandas as pd
	import gradio as gr
	from huggingface_hub import HfApi
	from pathlib import Path
	import os

	def get_model_stats(search_term):
	# Initialize the API
	api = HfApi()

	# Create a temporary file for the CSV
	temp_dir = tempfile.mkdtemp()
	output_file = Path(temp_dir) / f"{search_term}_models_alltime.csv"

	# Get the generator of models with the working sort parameter
	print(f"Fetching {search_term} models with download statistics...")
	models_generator = api.list_models(
	search=search_term,
	expand=["downloads", "downloadsAllTime"], # Get both 30-day and all-time downloads
	sort="_id" # Sort by ID to avoid timeout issues
	)

	# Initialize counters for total downloads
	total_30day_downloads = 0
	total_alltime_downloads = 0

	# Create and write to CSV
	with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
	csv_writer = csv.writer(csvfile)
	# Write header
	csv_writer.writerow(["Model ID", "Downloads (30 days)", "Downloads (All Time)"])

	# Process models
	model_count = 0
	for model in models_generator:
	# Get download counts
	downloads_30day = getattr(model, 'downloads', 0)
	downloads_alltime = getattr(model, 'downloads_all_time', 0)

	# Add to totals
	total_30day_downloads += downloads_30day
	total_alltime_downloads += downloads_alltime

	# Write to CSV
	csv_writer.writerow([
	getattr(model, 'id', "Unknown"),
	downloads_30day,
	downloads_alltime
	])
	model_count += 1

	# Read the CSV file into a pandas DataFrame
	df = pd.read_csv(output_file)

	# Create status message with total downloads
	status_message = (
	f"Found {model_count} models for search term '{search_term}'\n"
	f"Total 30-day downloads: {total_30day_downloads:,}\n"
	f"Total all-time downloads: {total_alltime_downloads:,}"
	)

	# Return both the DataFrame, status message, and the CSV file path
	return df, status_message, str(output_file)

	# Create the Gradio interface
	with gr.Blocks(title="Hugging Face Model Statistics") as demo:
	gr.Markdown("# Hugging Face Model Statistics")
	gr.Markdown("Enter a search term to find model statistics from Hugging Face Hub")

	with gr.Row():
	search_input = gr.Textbox(
	label="Search Term",
	placeholder="Enter a model name or keyword (e.g., 'gemma', 'llama')",
	value="gemma"
	)
	search_button = gr.Button("Search")

	with gr.Row():
	output_table = gr.Dataframe(
	headers=["Model ID", "Downloads (30 days)", "Downloads (All Time)"],
	datatype=["html", "number", "number"],
	label="Model Statistics",
	wrap=True
	)
	status_message = gr.Textbox(label="Status", lines=3)

	with gr.Row():
	download_button = gr.Button("Download CSV")
	csv_file = gr.File(label="CSV File", visible=True)

	# Store the CSV file path in a state
	csv_path = gr.State()

	def process_results(df, status, csv_path):
	# Convert model IDs to clickable links
	df['Model ID'] = df['Model ID'].apply(
	lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>'
	)
	return df, status, csv_path

	def get_csv_file(csv_path):
	if csv_path and os.path.exists(csv_path):
	return csv_path
	return None

	search_button.click(
	fn=get_model_stats,
	inputs=search_input,
	outputs=[output_table, status_message, csv_path]
	).then(
	fn=process_results,
	inputs=[output_table, status_message, csv_path],
	outputs=[output_table, status_message, csv_path]
	)

	download_button.click(
	fn=get_csv_file,
	inputs=csv_path,
	outputs=csv_file
	)

	if __name__ == "__main__":
	demo.launch()