""" Handle submissions to the GuardBench leaderboard. """ import json import os import tempfile from datetime import datetime from typing import Dict, List, Tuple import shutil from huggingface_hub import HfApi from datasets import load_dataset import subprocess from src.display.formatting import styled_error, styled_message from src.envs import RESULTS_DATASET_ID, TOKEN from src.leaderboard.processor import process_jsonl_submission from guardbench.evaluator import Evaluator from guardbench.context import GuardbenchContext from guardbench.models_config import ModelType def validate_submission(file_path: str) -> Tuple[bool, str]: """ Validate a submission file. """ try: entries, message = process_jsonl_submission(file_path) if not entries: return False, message return True, "Submission is valid" except Exception as e: return False, f"Error validating submission: {e}" def submit_entry_to_hub(entry: Dict, model_name: str, version="v0") -> Tuple[bool, str]: """ Submit a model's evaluation entry to the HuggingFace dataset. """ try: # Create safe model name for file path model_name_safe = model_name.replace("/", "_").replace(" ", "_") # Create entry path in entries folder entry_path = f"entries/entry_{model_name_safe}_{version}.json" # Save entry to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: json.dump(entry, temp_file, indent=2) temp_path = temp_file.name # Upload file api = HfApi(token=TOKEN) api.upload_file( path_or_fileobj=temp_path, path_in_repo=entry_path, repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Add evaluation entry for {model_name} (version {version})" ) os.unlink(temp_path) return True, f"Successfully uploaded evaluation entry for {model_name}" except Exception as e: return False, f"Error submitting entry to dataset: {e}" def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]: """ Submit updated leaderboard to the HuggingFace dataset. """ try: # Create leaderboard data leaderboard_data = { "entries": entries, "last_updated": datetime.now().isoformat(), "version": version } # Save to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: json.dump(leaderboard_data, temp_file, indent=2) temp_path = temp_file.name # Upload file api = HfApi(token=TOKEN) api.upload_file( path_or_fileobj=temp_path, path_in_repo=f"leaderboards/leaderboard_{version}.json", repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Update leaderboard for version {version}" ) os.unlink(temp_path) return True, "Leaderboard updated successfully" except Exception as e: return False, f"Error updating leaderboard: {e}" def process_submission(file_path: str, metadata: Dict, version="v0") -> str: """ Process a submission to the GuardBench leaderboard. """ try: # Validate submission is_valid, validation_message = validate_submission(file_path) if not is_valid: return styled_error(validation_message) # Get GuardBench results directory path guardbench_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "guard-bench-submodule") results_dir = os.path.join(guardbench_dir, "results") os.makedirs(results_dir, exist_ok=True) # Copy submission to GuardBench results directory model_name = metadata.get("model_name", "unknown") model_name_safe = model_name.replace("/", "_").replace(" ", "_") guard_model_type = metadata.get("guard_model_type", "unknown") target_file = os.path.join(results_dir + "/guardbench_dataset_1k_public", f"{model_name_safe}.jsonl") # Upload raw submission file api = HfApi(token=TOKEN) submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl" api.upload_file( path_or_fileobj=file_path, path_in_repo=submission_path, repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Add raw submission for {model_name}" ) os.makedirs(results_dir + "/guardbench_dataset_1k_public", exist_ok=True) # (f"Submission path: {submission_path}") # print(f"Target file: {target_file}") # printprint(f"Results dir: {results_dir}") shutil.copy2(file_path, target_file) # print(f"Copied file to target file: {target_file}") # print(f" ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/: {subprocess.check_output('ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/', shell=True).decode('utf-8')}") try: # Initialize GuardBench context ctx = GuardbenchContext() # Set results directory ctx.results_dir = results_dir # Set bench name from the results directory ctx.bench_name = "guardbench_dataset_1k_public" # Load dataset ctx.load_dataset("whitecircle-ai/guardbench_dataset_1k_public") # Mark as initialized ctx.is_initialized = True evaluator = Evaluator(ctx, force=True, using_cached=True) # Run evaluation and get entry evaluator.evaluate_model(model_name_safe, str(guard_model_type).lower()) # Get the entry from results with open(os.path.join(results_dir + "/" + ctx.bench_name, "leaderboard.json"), 'r') as f: results_data = json.load(f) model_entry = next( (entry for entry in results_data.get("entries", []) if entry.get("model_name") == model_name_safe), None ) if not model_entry: return styled_error("No evaluation results found") # Add metadata to entry model_entry.update({ "model_name": metadata.get("model_name"), # Use original model name "model_type": metadata.get("model_type"), "guard_model_type": str(metadata.get("guard_model_type")).lower(), "base_model": metadata.get("base_model"), "revision": metadata.get("revision"), "precision": metadata.get("precision"), "weight_type": metadata.get("weight_type"), "version": version, "submission_date": datetime.now().isoformat() }) # Submit entry to entries folder success, message = submit_entry_to_hub(model_entry, model_name, version) if not success: return styled_error(message) # Get all entries from HF dataset api = HfApi(token=TOKEN) files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] all_entries = [] for entry_file in entry_files: try: entry_path = api.hf_hub_download( repo_id=RESULTS_DATASET_ID, filename=entry_file, repo_type="dataset", ) with open(entry_path, 'r') as f: entry_data = json.load(f) all_entries.append(entry_data) except Exception as e: print(f"Error loading entry {entry_file}: {e}") # Update leaderboard with all entries success, message = submit_leaderboard_to_hub(all_entries, version) if not success: return styled_error(message) return styled_message(f"Submission successful! Model evaluated and leaderboard updated.") except Exception as eval_error: return styled_error(f"Error during evaluation: {eval_error}") except Exception as e: return styled_error(f"Error processing submission: {e}") finally: # Clean up temporary files try: if os.path.exists(file_path): os.remove(file_path) if os.path.exists(target_file): os.remove(target_file) except: pass