""" Handle submissions to the GuardBench leaderboard. """ import json import os import tempfile from datetime import datetime from typing import Dict, List, Tuple import shutil import threading import time from huggingface_hub import HfApi from datasets import load_dataset import subprocess from src.display.formatting import styled_error, styled_message from src.envs import RESULTS_DATASET_ID, TOKEN, REPO_ID from src.leaderboard.processor import process_jsonl_submission from circleguardbench.evaluator import Evaluator from circleguardbench.context import GuardbenchContext from circleguardbench.models_config import ModelType def validate_submission(file_path: str) -> Tuple[bool, str]: """ Validate a submission file. """ try: entries, message = process_jsonl_submission(file_path) if not entries: return False, message return True, "Submission is valid" except Exception as e: return False, f"Error validating submission: {e}" def submit_entry_to_hub(entry: Dict, model_name: str, mode: str, version="v0") -> Tuple[bool, str]: """ Submit a model's evaluation entry to the HuggingFace dataset. The entry is uniquely identified by model_name, mode, and version. """ try: # Create safe model name for file path model_name_safe = model_name.replace("/", "_").replace(" ", "_") mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() # Create entry path in entries folder entry_path = f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json" # Save entry to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: json.dump(entry, temp_file, indent=2) temp_path = temp_file.name # Upload file api = HfApi(token=TOKEN) api.upload_file( path_or_fileobj=temp_path, path_in_repo=entry_path, repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Add evaluation entry for {model_name} (mode {mode}, version {version})" ) os.unlink(temp_path) return True, f"Successfully uploaded evaluation entry for {model_name} (mode {mode})" except Exception as e: return False, f"Error submitting entry to dataset: {e}" def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]: """ Submit updated leaderboard to the HuggingFace dataset. """ try: # Create leaderboard data leaderboard_data = { "entries": entries, "last_updated": datetime.now().isoformat(), "version": version } # Save to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: json.dump(leaderboard_data, temp_file, indent=2) temp_path = temp_file.name # Upload file api = HfApi(token=TOKEN) api.upload_file( path_or_fileobj=temp_path, path_in_repo=f"leaderboards/leaderboard_{version}.json", repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Update leaderboard for version {version}" ) os.unlink(temp_path) return True, "Leaderboard updated successfully" except Exception as e: return False, f"Error updating leaderboard: {e}" def restart_space_after_delay(delay_seconds: int = 2) -> None: """ Restart the Hugging Face Space after a delay. """ def _restart_space(): time.sleep(delay_seconds) try: api = HfApi(token=TOKEN) api.restart_space(repo_id=REPO_ID) except Exception as e: print(f"Error restarting space: {e}") # Start the restart in a separate thread thread = threading.Thread(target=_restart_space) thread.daemon = True thread.start() def process_submission(file_path: str, metadata: Dict, version="v0") -> str: """ Process a submission to the GuardBench leaderboard. """ try: # Validate submission is_valid, validation_message = validate_submission(file_path) if not is_valid: return styled_error(validation_message) # Get GuardBench results directory path guardbench_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "guard-bench-submodule") results_dir = os.path.join(guardbench_dir, "results") os.makedirs(results_dir, exist_ok=True) # Copy submission to GuardBench results directory model_name = metadata.get("model_name", "unknown") model_name_safe = model_name.replace("/", "_").replace(" ", "_") guard_model_type = metadata.get("guard_model_type", "unknown") target_file = os.path.join(results_dir + "/circleguardbench_public", f"{model_name_safe}.jsonl") # Upload raw submission file api = HfApi(token=TOKEN) submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl" api.upload_file( path_or_fileobj=file_path, path_in_repo=submission_path, repo_id=RESULTS_DATASET_ID, repo_type="dataset", commit_message=f"Add raw submission for {model_name}" ) os.makedirs(results_dir + "/circleguardbench_public", exist_ok=True) # (f"Submission path: {submission_path}") # print(f"Target file: {target_file}") # printprint(f"Results dir: {results_dir}") shutil.copy2(file_path, target_file) # print(f"Copied file to target file: {target_file}") # print(f" ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/: {subprocess.check_output('ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/', shell=True).decode('utf-8')}") try: # Initialize GuardBench context ctx = GuardbenchContext() # Set results directory ctx.results_dir = results_dir # Set bench name from the results directory ctx.bench_name = "circleguardbench_public" # Load dataset ctx.load_dataset("whitecircle-ai/circleguardbench_public") # Mark as initialized ctx.is_initialized = True evaluator = Evaluator(ctx, force=True, using_cached=True) # Run evaluation and get entry evaluator.evaluate_model(model_name_safe, str(guard_model_type).lower()) # Get the entry from results with open(os.path.join(results_dir + "/" + ctx.bench_name, "leaderboard.json"), 'r') as f: results_data = json.load(f) model_entry = next( (entry for entry in results_data.get("entries", []) if entry.get("model_name") == model_name_safe), None ) if not model_entry: return styled_error("No evaluation results found") # Add metadata to entry model_entry.update({ "model_name": metadata.get("model_name"), # Use original model name "model_type": metadata.get("model_type"), "guard_model_type": str(metadata.get("guard_model_type")).lower(), "mode": metadata.get("mode"), "base_model": metadata.get("base_model"), "revision": metadata.get("revision"), "precision": metadata.get("precision"), "weight_type": metadata.get("weight_type"), "version": version, "submission_date": datetime.now().isoformat() }) # Submit entry to entries folder success, message = submit_entry_to_hub(model_entry, model_name, metadata.get("mode"), version) if not success: return styled_error(message) # Get all entries from HF dataset api = HfApi(token=TOKEN) files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] all_entries = [] for entry_file in entry_files: try: entry_path = api.hf_hub_download( repo_id=RESULTS_DATASET_ID, filename=entry_file, repo_type="dataset", ) with open(entry_path, 'r') as f: entry_data = json.load(f) all_entries.append(entry_data) except Exception as e: print(f"Error loading entry {entry_file}: {e}") # Update leaderboard with all entries success, message = submit_leaderboard_to_hub(all_entries, version) if not success: return styled_error(message) restart_space_after_delay(5) return styled_message("Submission successful! Model evaluated and leaderboard updated.") except Exception as eval_error: return styled_error(f"Error during evaluation: {eval_error}") except Exception as e: return styled_error(f"Error processing submission: {e}") finally: # Clean up temporary files try: if os.path.exists(file_path): os.remove(file_path) if os.path.exists(target_file): os.remove(target_file) except: pass