# A simple script that loops over all public models and get their libary_name import gradio as gr import pandas as pd import numpy as np from collections import Counter from huggingface_hub import HfApi from datasets import load_dataset api = HfApi() list_models = api.list_models() def fetch_dataset_and_init(): dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train") library_names = dataset["library_name"] string_counts = Counter(library_names) string_counts_series = pd.Series(string_counts) # Sort the series in descending order df = string_counts_series.sort_values(ascending=False).to_frame() df.columns = ["count"] df = df.reset_index() df = df.rename(columns={"index": "library_name"}) df.replace(to_replace=[None], value="No library_name", inplace=True) df_log = df.copy() df_log['count'] = np.log(df_log['count']) return df, df_log df, df_log = fetch_dataset_and_init() def get_current_nb_models(): # We need this hack since `list_models` returns a generator.. total_models = sum(1 for _ in list_models) diff_models = total_models - df["count"].sum() return str(diff_models) plot_height = 512 plot_width = 1512 select_box = ["all"] top_k = len(df) def bar_plot_fn(display, top_k, select_box): if display == "simple": if select_box is not None and ("all" not in select_box or select_box != ["all"]): current_df = df[df["library_name"].isin(select_box)] else: current_df = df[:top_k] return gr.BarPlot( current_df, x="library_name", y="count", tooltip=["library_name", "count"], height=plot_height, width=plot_width ) elif display == "log": if select_box is not None and ("all" not in select_box or select_box != ["all"]): current_df = df_log[df_log["library_name"].isin(select_box)] else: current_df = df_log[:top_k] return gr.BarPlot( current_df, x="library_name", y="count", tooltip=["library_name", "count"], height=plot_height, width=plot_width ) with gr.Blocks() as bar_plot: with gr.Column(): with gr.Column(): display = gr.Dropdown( choices=[ "simple", "log", ], value="simple", label="Type of Bar Plot", ) top_k = gr.Slider( label="Select top-K most used library_name (This leads to a no-op if you selected something else than 'all' in the columns below)", value=len(df), minimum=1, maximum=len(df), step=1, ) with gr.Column(): plot = gr.BarPlot() with gr.Row(): fetch_button = gr.Button(value="Fetch current number of models without model cards (takes up to 1min to fetch everything)") text_box = gr.Textbox(value="", label="Number of models without model cards") with gr.Column(): select_box = gr.Dropdown( ["all"] + df["library_name"].tolist(), value=["all"], multiselect=True, label="Libraries to inspect", info="Select specific libraries to inspect" ) top_k.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) display.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) select_box.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) fetch_button.click(get_current_nb_models, outputs=[text_box]) bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot) bar_plot.launch()