Spaces:

PrunaAI
/

InferBench

Running

File size: 6,878 Bytes

1c9c07a
099bd02
 
 
8f93924
 
099bd02
8f93924
1c9c07a
 
099bd02
8f93924
1c9c07a
 
 
14b802d
 
1c9c07a
 
 
099bd02
1c9c07a
099bd02
 
1c9c07a
 
a65206d
1c9c07a
 
099bd02
1c9c07a
 
099bd02
1c9c07a
a65206d
1c9c07a
 
 
 
8f93924
1c9c07a
099bd02
37936f0
a65206d
 
099bd02
 
 
 
 
 
 
1c9c07a
099bd02
 
 
 
 
 
 
 
 
 
 
 
8f93924
099bd02
 
8f93924
099bd02
1c9c07a
 
 
 
 
a65206d
975aae7
1c9c07a
0c350fd
1c9c07a
 
 
975aae7
1c9c07a
975aae7
1c9c07a
 
975aae7
1c9c07a
 
0c350fd
a65206d
 
1c9c07a
 
 
a65206d
 
 
 
 
83c9fd8
1c9c07a
 
099bd02
1c9c07a
 
 
 
 
a65206d
8f93924
975aae7
 
 
1c9c07a
 
 
 
 
 
 
 
 
975aae7
099bd02

import json
import math
from pathlib import Path

import gradio as gr
import pandas as pd
from gradio_leaderboard import ColumnFilter, Leaderboard

from assets import custom_css

abs_path = Path(__file__).parent

# Load the JSONL file into a pandas DataFrame using the json library
with open(abs_path / "results.jsonl", "r") as file:
    json_data = file.read()
    partially_fixed_json_data = json_data.replace("}\n{", "},\n{")
    fixed_json_data = f"[{partially_fixed_json_data}]"
    json_data = json.loads(fixed_json_data)
df = pd.DataFrame(json_data)

df["Model"] = df.apply(
    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{row["Model"]}</a>',
    axis=1,
)
df = df[
    ["Model", "Median Inference Time", "Price per Image"]
    + [col for col in df.columns.tolist() if col not in ["URL", "Model", "Median Inference Time", "Price per Image"]]
]
df = df.sort_values(by="GenEval", ascending=False)

with gr.Blocks("ParityError/Interstellar", css=custom_css) as demo:
    gr.HTML(
        """
            <div style="text-align: center;">
                <img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;">
                <h1>🏋️ InferBench 🏋️</h1>
                <h2>A cost/quality/speed Leaderboard for Inference Providers!</h2>
            </div>
            """
    )

    with gr.Tabs():
        with gr.TabItem("FLUX.1 [dev] Leaderboard"):
            median_inference_time_min = math.floor(float(df["Median Inference Time"].min()))
            median_inference_time_max = math.ceil(float(df["Median Inference Time"].max()))
            price_per_image_min = math.floor(float(df["Price per Image"].min()))
            price_per_image_max = math.ceil(float(df["Price per Image"].max()))
            Leaderboard(
                value=df,
                search_columns=["Model"],
                filter_columns=[
                    ColumnFilter(
                        column="Median Inference Time",
                        type="slider",
                        default=[median_inference_time_min, median_inference_time_max],
                        min=median_inference_time_min,
                        max=median_inference_time_max,
                    ),
                    ColumnFilter(
                        column="Price per Image",
                        type="slider",
                        default=[price_per_image_min, price_per_image_max],
                        min=price_per_image_min,
                        max=price_per_image_max,
                    ),
                ],
                select_columns=df.columns.tolist(),
                datatype="markdown",
            )
        with gr.TabItem("About"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown(
                        """
                        # 💜 About Pruna AI
                        We are [Pruna AI, an open source AI optimisation engine](https://github.com/PrunaAI/pruna) and we simply make your models cheaper, faster, smaller, greener!

                        # 📊 About InferBench
                        InferBench is a leaderboard for inference providers, focusing on cost, quality, and speed.
                        Over the past few years, we’ve observed outstanding progress in image generation models fueled by ever-larger architectures.
                        Due to their size, state-of-the-art models such as FLUX take more than 6 seconds to generate a single image on a high-end H100 GPU.
                        While compression techniques can reduce inference time, their impact on quality often remains unclear.

                        To bring more transparency around the quality of compressed models:

                        - We release “juiced” endpoints for popular image generation models on Replicate, making it easy to play around with our compressed models.
                        - We assess the quality of compressed FLUX-APIs from Replicate, fal, Fireworks AI and Together AI according to different benchmarks.

                        FLUX-juiced was obtained using a combination of compilation and caching algorithms and we are proud to say that it consistently outperforms alternatives, while delivering performance on par with the original model.
                        This combination is available in our Pruna Pro package and can be applied to almost every image generation model.

                        - A full blogpost on the methodology can be found [here](https://pruna.ai/blog/flux-juiced).
                        - A website that compares the outputs of the different models can be found [here](https://www.notion.so/FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0?pvs=4).
                        """
                    )
                with gr.Column(scale=1):
                    gr.HTML(
                        """
                        <iframe src="https://www.notion.so/FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0?pvs=4" width="100%" height="100%" frameborder="0"></iframe>
                        """
                    )

        with gr.Accordion("🌍 Join the Pruna AI community!", open=False):
            gr.HTML(
                """
                    <a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a>
                    <a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a>
                    <a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a>
                    <a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&amp;logo=discord"></a>
                    <a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a>
                """
            )
        with gr.Accordion("Citation", open=True):
            gr.Markdown(
                """
                ```bibtex
                @article{InferBench,
                    title={InferBench: A Leaderboard for Inference Providers},
                    author={PrunaAI},
                    year={2025},
                    howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}}
                }
                ```
                """
            )
if __name__ == "__main__":
    demo.launch()