import pandas as pd def add_model_size_groups(df, group_size=0.5, max_size=15): """ Add a column to the DataFrame categorizing model file sizes into size groups. Args: df (pandas.DataFrame): DataFrame containing model benchmark data group_size (float): Size of each group in GB (default: 0.5) max_size (int): Maximum size in GB to consider (default: 15) Returns: pandas.DataFrame: Original DataFrame with an additional 'Size Group' column """ if df is None or df.empty: return df result_df = df.copy() if "Model Size GB" not in result_df.columns: # Check if 'Model File Size' exists in the DataFrame if "Model File Size" not in result_df.columns: raise ValueError("DataFrame must contain 'Model File Size' column") result_df["Model Size GB"] = result_df["Model File Size"] / 1024**3 # Define a function to assign size groups def assign_size_group(size): if size is None or pd.isna(size): return "Unknown" if size >= max_size: return f">{max_size} GB" import math group_index = math.floor(size / group_size) lower_bound = group_index * group_size upper_bound = lower_bound + group_size # Round to 1 decimal place to avoid floating point issues lower_bound = round(lower_bound, 1) upper_bound = round(upper_bound, 1) return f"{lower_bound}-{upper_bound} GB" result_df["Size Group"] = result_df["Model Size GB"].apply(assign_size_group) return result_df def detect_anomalies(df, z_threshold=6.0, min_samples=5): """ Detect anomalies in benchmark data. Args: df (pd.DataFrame): DataFrame containing benchmark data z_threshold (float): Z-score threshold for anomaly detection (default: 6.0) min_samples (int): Minimum number of samples needed for a group to calculate statistics Returns: pd.DataFrame: DataFrame containing detected anomalies with relevant information """ if df is None or df.empty: return pd.DataFrame() # Ensure we have Size Group column if "Size Group" not in df.columns: df = add_model_size_groups(df) anomalies = [] for metric in ["Prompt Processing", "Token Generation"]: size_groups = df.groupby("Size Group") for size_group, group_df in size_groups: # Only process groups with enough samples if len(group_df) < min_samples: continue mean_value = group_df[metric].mean() std_value = group_df[metric].std() # Skip if standard deviation is zero or very small if std_value < 0.001: continue # Calculate z-scores for each entry for _, row in group_df.iterrows(): value = row[metric] if pd.isna(value): continue z_score = abs((value - mean_value) / std_value) # Flag as anomaly if z-score exceeds threshold if z_score > z_threshold: anomaly_data = { "Size Group": size_group, "Model": row["Model"], "Device": row["Device"], "Device ID": row["Device ID"], "Platform": row["Platform"], "Metric": metric, "Value": value, "Mean": mean_value, "Std": std_value, "Z-Score": z_score, "Times Faster/Slower": value / mean_value, "Benchmark": row["Benchmark"], "Submission ID": row["Submission ID"], } anomalies.append(anomaly_data) anomaly_df = pd.DataFrame(anomalies) if not anomaly_df.empty: anomaly_df = anomaly_df.sort_values(by="Z-Score", ascending=False) return anomaly_df def filter_anomalies(df, z_threshold=9.0, min_samples=5): """ Filter out anomalies from a DataFrame. Args: df (pd.DataFrame): DataFrame containing benchmark data z_threshold (float): Z-score threshold for anomaly detection (default: 9.0) min_samples (int): Minimum number of samples needed for a group to calculate statistics Returns: tuple: (filtered_df, anomalies_df) - the filtered DataFrame without anomalies and the anomalies DataFrame """ if df is None or df.empty: return df, pd.DataFrame() # Find anomalies anomalies = detect_anomalies(df, z_threshold, min_samples) if anomalies.empty: return df, anomalies anomaly_ids = set(anomalies["Submission ID"].dropna().unique()) filtered_df = df[~df["Submission ID"].isin(anomaly_ids)] return filtered_df, anomalies