File size: 9,736 Bytes
e7abd9e 23c96f8 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 4668301 58582d3 e7abd9e 58582d3 e7abd9e afcd31f 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 92b57b8 e7abd9e 58582d3 e7abd9e 3521728 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 3521728 58582d3 e7abd9e 3521728 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 3521728 58582d3 e7abd9e 92b57b8 e7abd9e 58582d3 f1e551a dd9222d 3521728 58582d3 e7abd9e dd9222d e7abd9e dd9222d 58582d3 afcd31f 80d1ba2 e7abd9e 893ecc7 1de2d20 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e f1e551a e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e f1e551a e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 e7abd9e 58582d3 4668301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
from app.core.cache import cache_config
from typing import List, Dict, Any
import datasets
import logging
from app.config.base import HF_ORGANIZATION
from app.core.formatting import LogFormatter
logger = logging.getLogger(__name__)
class LeaderboardService:
def __init__(self):
pass
async def fetch_raw_data(self) -> List[Dict[str, Any]]:
"""Fetch raw leaderboard data from HuggingFace dataset"""
try:
logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
logger.info(
LogFormatter.info(
f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"
)
)
dataset = datasets.load_dataset(
f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
cache_dir=cache_config.get_cache_path("datasets"),
)["train"]
df = dataset.to_pandas()
# Drop NaN values before converting to dict
df = df.replace({float('nan'): None})
data = df.to_dict("records")
stats = {
"Total_Entries": len(data),
"Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB",
}
for line in LogFormatter.stats(stats, "Dataset Statistics"):
logger.info(line)
return data
except Exception as e:
logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
# Return fallback data when no results are available yet
return []
async def get_formatted_data(self) -> List[Dict[str, Any]]:
"""Get formatted leaderboard data"""
try:
logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
raw_data = await self.fetch_raw_data()
formatted_data = {}
type_counts = {}
error_count = 0
# Initialize progress tracking
total_items = len(raw_data)
logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
for i, item in enumerate(raw_data, 1):
try:
formatted_item = await self.transform_data(item)
unique_id = formatted_item["id"]
# Check if entry already exists and keep the one with the latest submission date
if unique_id in formatted_data:
existing_date = formatted_data[unique_id]["metadata"].get("submission_date")
new_date = formatted_item["metadata"].get("submission_date")
# If new item has a newer submission date (or existing has no date), replace it
if not existing_date or (new_date and new_date > existing_date):
formatted_data[unique_id] = formatted_item
else:
formatted_data[unique_id] = formatted_item
# Count model types
if unique_id not in formatted_data or formatted_data[unique_id] == formatted_item:
model_type = formatted_item["model"]["type"]
type_counts[model_type] = type_counts.get(model_type, 0) + 1
except Exception as e:
error_count += 1
logger.error(
LogFormatter.error(
f"Failed to format entry {i}/{total_items}", e
)
)
continue
# Log progress every 10%
if i % max(1, total_items // 10) == 0:
logger.info(
LogFormatter.info(
f"Progress: {LogFormatter.progress_bar(i, total_items)}"
)
)
# Log final statistics
stats = {
"Total_Processed": total_items,
"Successful": len(formatted_data),
"Failed": error_count,
}
logger.info(LogFormatter.section("PROCESSING SUMMARY"))
for line in LogFormatter.stats(stats, "Processing Statistics"):
logger.info(line)
# Log model type distribution
type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
for line in LogFormatter.stats(type_stats):
logger.info(line)
return list(formatted_data.values())
except Exception as e:
logger.error(LogFormatter.error("Failed to format leaderboard data", e))
# Return fallback data when no results are available yet
return []
async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform raw data into the format expected by the frontend"""
try:
# Extract model name for logging
model_name = data.get("fullname", "Unknown")
logger.debug(
LogFormatter.info(f"Transforming data for model: {model_name}")
)
# Create unique ID combining model name, precision, sha and chat template status
unique_id = f"{data.get('fullname', 'Unknown')}"
evaluations = {
"safetensors": {
"name": "SafeTensors",
"value": data.get("safetensors", 0),
"normalized_score": data.get("safetensors", 0),
},
"secure_coding": {
"name": "Secure Coding",
"value": data.get("secure_coding", 0),
"normalized_score": data.get("secure_coding", 0),
},
"cve_knowledge": {
"name": "CVE Knowledge",
"value": data.get("cve_knowledge", 0),
"normalized_score": data.get("cve_knowledge", 0),
},
"insecure_recognition": {
"name": "Insecure Code Detection",
"value": data.get("insecure_recognition", 0),
"normalized_score": data.get("insecure_recognition", 0),
},
}
features = {
"is_not_available_on_hub": data.get("Available on the hub", False),
"is_merged": data.get("Merged", False),
"is_moe": data.get("MoE", False),
"is_flagged": data.get("Flagged", False),
"is_official_provider": data.get("Official Providers", False),
}
metadata = {
"upload_date": data.get("Upload To Hub Date"),
"submission_date": data.get("Submission Date"),
"generation": data.get("Generation"),
"base_model": data.get("Base Model"),
"hub_license": data.get("Hub License"),
"hub_hearts": data.get("Hub ❤️"),
"params_billions": data.get("#Params (B)"),
"co2_cost": data.get("CO₂ cost (kg)", 0),
}
# Clean model type by removing emojis if present
original_type = data.get("Type", "")
model_type = original_type.lower().strip()
# Remove emojis and parentheses
if "(" in model_type:
model_type = model_type.split("(")[0].strip()
model_type = "".join(c for c in model_type if c not in "🔶🟢🟩💬🤝🌸 ")
# Map old model types to new ones
model_type_mapping = {
"fine-tuned": "fined-tuned-on-domain-specific-dataset",
"fine tuned": "fined-tuned-on-domain-specific-dataset",
"finetuned": "fined-tuned-on-domain-specific-dataset",
"fine_tuned": "fined-tuned-on-domain-specific-dataset",
"ft": "fined-tuned-on-domain-specific-dataset",
"finetuning": "fined-tuned-on-domain-specific-dataset",
"fine tuning": "fined-tuned-on-domain-specific-dataset",
"fine-tuning": "fined-tuned-on-domain-specific-dataset",
}
mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
if mapped_type != model_type:
logger.debug(
LogFormatter.info(
f"Model type mapped: {original_type} -> {mapped_type}"
)
)
transformed_data = {
"id": unique_id,
"model": {
"name": data.get("fullname"),
"sha": data.get("Model sha"),
"precision": data.get("Precision"),
"type": mapped_type,
"weight_type": data.get("Weight type"),
"architecture": data.get("Architecture"),
"average_score": data.get("Average ⬆️"),
"has_chat_template": data.get("Chat Template", False),
},
"evaluations": evaluations,
"features": features,
"metadata": metadata,
}
logger.debug(
LogFormatter.success(f"Successfully transformed data for {model_name}")
)
return transformed_data
except Exception as e:
logger.error(
LogFormatter.error(
f"Failed to transform data for {data.get('fullname', 'Unknown')}", e
)
)
raise
|