Spaces:

fdaudens
/

model-stats-search-keywords

Sleeping

File size: 4,143 Bytes

0da2f8d
d4fbda3
 
0da2f8d
 
d4fbda3
7847f35
d4fbda3
 
 
 
 
 
 
 
 
 
0da2f8d
d4fbda3
 
0da2f8d
d4fbda3
 
 
bd3fe94
 
 
 
d4fbda3
 
 
 
0da2f8d
d4fbda3
 
 
 
bd3fe94
 
 
 
 
 
 
 
d4fbda3
 
 
bd3fe94
 
d4fbda3
 
 
 
 
 
bd3fe94
 
 
 
 
 
 
d4fbda3
bd3fe94
d4fbda3
 
 
 
bd3fe94
d4fbda3
 
 
 
 
 
 
 
 
 
c1c6f57
 
 
 
 
 
 
d4fbda3
 
 
7847f35
d4fbda3
 
 
 
8c6fcfb
c1c6f57
 
 
 
8c6fcfb
 
7847f35
 
 
 
 
d4fbda3
 
 
 
8c6fcfb
 
 
 
d4fbda3
 
 
7847f35
d4fbda3
 
 
 
 
0da2f8d

import tempfile
import csv
import pandas as pd
import gradio as gr
from huggingface_hub import HfApi
from pathlib import Path
import os

def get_model_stats(search_term):
    # Initialize the API
    api = HfApi()
    
    # Create a temporary file for the CSV
    temp_dir = tempfile.mkdtemp()
    output_file = Path(temp_dir) / f"{search_term}_models_alltime.csv"
    
    # Get the generator of models with the working sort parameter
    print(f"Fetching {search_term} models with download statistics...")
    models_generator = api.list_models(
        search=search_term, 
        expand=["downloads", "downloadsAllTime"],  # Get both 30-day and all-time downloads
        sort="_id"  # Sort by ID to avoid timeout issues
    )
    
    # Initialize counters for total downloads
    total_30day_downloads = 0
    total_alltime_downloads = 0
    
    # Create and write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write header
        csv_writer.writerow(["Model ID", "Downloads (30 days)", "Downloads (All Time)"])
        
        # Process models
        model_count = 0
        for model in models_generator:
            # Get download counts
            downloads_30day = getattr(model, 'downloads', 0)
            downloads_alltime = getattr(model, 'downloads_all_time', 0)
            
            # Add to totals
            total_30day_downloads += downloads_30day
            total_alltime_downloads += downloads_alltime
            
            # Write to CSV
            csv_writer.writerow([
                getattr(model, 'id', "Unknown"),
                downloads_30day,
                downloads_alltime
            ])
            model_count += 1
    
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(output_file)
    
    # Create status message with total downloads
    status_message = (
        f"Found {model_count} models for search term '{search_term}'\n"
        f"Total 30-day downloads: {total_30day_downloads:,}\n"
        f"Total all-time downloads: {total_alltime_downloads:,}"
    )
    
    # Return both the DataFrame, status message, and the CSV file path
    return df, status_message, str(output_file)

# Create the Gradio interface
with gr.Blocks(title="Hugging Face Model Statistics") as demo:
    gr.Markdown("# Hugging Face Model Statistics")
    gr.Markdown("Enter a search term to find model statistics from Hugging Face Hub")
    
    with gr.Row():
        search_input = gr.Textbox(
            label="Search Term",
            placeholder="Enter a model name or keyword (e.g., 'gemma', 'llama')",
            value="gemma"
        )
        search_button = gr.Button("Search")
    
    with gr.Row():
        output_table = gr.Dataframe(
            headers=["Model ID", "Downloads (30 days)", "Downloads (All Time)"],
            datatype=["html", "number", "number"],
            label="Model Statistics",
            wrap=True
        )
        status_message = gr.Textbox(label="Status", lines=3)
    
    with gr.Row():
        download_button = gr.Button("Download CSV")
        csv_file = gr.File(label="CSV File", visible=True)
    
    # Store the CSV file path in a state
    csv_path = gr.State()
    
    def process_results(df, status, csv_path):
        # Convert model IDs to clickable links
        df['Model ID'] = df['Model ID'].apply(
            lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>'
        )
        return df, status, csv_path
    
    def get_csv_file(csv_path):
        if csv_path and os.path.exists(csv_path):
            return csv_path
        return None
    
    search_button.click(
        fn=get_model_stats,
        inputs=search_input,
        outputs=[output_table, status_message, csv_path]
    ).then(
        fn=process_results,
        inputs=[output_table, status_message, csv_path],
        outputs=[output_table, status_message, csv_path]
    )
    
    download_button.click(
        fn=get_csv_file,
        inputs=csv_path,
        outputs=csv_file
    )

if __name__ == "__main__":
    demo.launch()