File size: 4,933 Bytes
157b914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pandas as pd


def add_model_size_groups(df, group_size=0.5, max_size=15):
    """
    Add a column to the DataFrame categorizing model file sizes into size groups.

    Args:
        df (pandas.DataFrame): DataFrame containing model benchmark data
        group_size (float): Size of each group in GB (default: 0.5)
        max_size (int): Maximum size in GB to consider (default: 15)

    Returns:
        pandas.DataFrame: Original DataFrame with an additional 'Size Group' column
    """
    if df is None or df.empty:
        return df

    result_df = df.copy()

    if "Model Size GB" not in result_df.columns:
        # Check if 'Model File Size' exists in the DataFrame
        if "Model File Size" not in result_df.columns:
            raise ValueError("DataFrame must contain 'Model File Size' column")
        result_df["Model Size GB"] = result_df["Model File Size"] / 1024**3

    # Define a function to assign size groups
    def assign_size_group(size):
        if size is None or pd.isna(size):
            return "Unknown"

        if size >= max_size:
            return f">{max_size} GB"

        import math

        group_index = math.floor(size / group_size)
        lower_bound = group_index * group_size
        upper_bound = lower_bound + group_size

        # Round to 1 decimal place to avoid floating point issues
        lower_bound = round(lower_bound, 1)
        upper_bound = round(upper_bound, 1)

        return f"{lower_bound}-{upper_bound} GB"

    result_df["Size Group"] = result_df["Model Size GB"].apply(assign_size_group)

    return result_df


def detect_anomalies(df, z_threshold=6.0, min_samples=5):
    """
    Detect anomalies in benchmark data.

    Args:
        df (pd.DataFrame): DataFrame containing benchmark data
        z_threshold (float): Z-score threshold for anomaly detection (default: 6.0)
        min_samples (int): Minimum number of samples needed for a group to calculate statistics

    Returns:
        pd.DataFrame: DataFrame containing detected anomalies with relevant information
    """
    if df is None or df.empty:
        return pd.DataFrame()

    # Ensure we have Size Group column
    if "Size Group" not in df.columns:
        df = add_model_size_groups(df)

    anomalies = []

    for metric in ["Prompt Processing", "Token Generation"]:
        size_groups = df.groupby("Size Group")

        for size_group, group_df in size_groups:
            # Only process groups with enough samples
            if len(group_df) < min_samples:
                continue

            mean_value = group_df[metric].mean()
            std_value = group_df[metric].std()

            # Skip if standard deviation is zero or very small
            if std_value < 0.001:
                continue

            # Calculate z-scores for each entry
            for _, row in group_df.iterrows():
                value = row[metric]
                if pd.isna(value):
                    continue

                z_score = abs((value - mean_value) / std_value)

                # Flag as anomaly if z-score exceeds threshold
                if z_score > z_threshold:
                    anomaly_data = {
                        "Size Group": size_group,
                        "Model": row["Model"],
                        "Device": row["Device"],
                        "Device ID": row["Device ID"],
                        "Platform": row["Platform"],
                        "Metric": metric,
                        "Value": value,
                        "Mean": mean_value,
                        "Std": std_value,
                        "Z-Score": z_score,
                        "Times Faster/Slower": value / mean_value,
                        "Benchmark": row["Benchmark"],
                        "Submission ID": row["Submission ID"],
                    }

                    anomalies.append(anomaly_data)

    anomaly_df = pd.DataFrame(anomalies)
    if not anomaly_df.empty:
        anomaly_df = anomaly_df.sort_values(by="Z-Score", ascending=False)

    return anomaly_df


def filter_anomalies(df, z_threshold=9.0, min_samples=5):
    """
    Filter out anomalies from a DataFrame.

    Args:
        df (pd.DataFrame): DataFrame containing benchmark data
        z_threshold (float): Z-score threshold for anomaly detection (default: 9.0)
        min_samples (int): Minimum number of samples needed for a group to calculate statistics

    Returns:
        tuple: (filtered_df, anomalies_df) - the filtered DataFrame without anomalies and the anomalies DataFrame
    """
    if df is None or df.empty:
        return df, pd.DataFrame()

    # Find anomalies
    anomalies = detect_anomalies(df, z_threshold, min_samples)

    if anomalies.empty:
        return df, anomalies

    anomaly_ids = set(anomalies["Submission ID"].dropna().unique())
    filtered_df = df[~df["Submission ID"].isin(anomaly_ids)]
    return filtered_df, anomalies