File size: 9,730 Bytes
d4d998a
 
 
 
 
 
 
 
 
b1cb07d
a7b55ff
 
d4d998a
 
b1cb07d
97cb7a0
d4d998a
b1cb07d
98f5e7c
b1cb07d
9064b69
 
 
d4d998a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7eca29
d4d998a
a7eca29
d4d998a
 
b1cb07d
d4d998a
a7eca29
3c01baa
b1cb07d
a7eca29
3c01baa
b1cb07d
 
 
 
3c01baa
b1cb07d
 
 
 
 
 
 
a7eca29
b1cb07d
3c01baa
b1cb07d
a7eca29
b1cb07d
 
3c01baa
 
b1cb07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4d998a
3c01baa
b1cb07d
 
d4d998a
 
b1cb07d
 
d4d998a
b1cb07d
d4d998a
3c01baa
d4d998a
b1cb07d
d4d998a
b1cb07d
d4d998a
 
a7b55ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c01baa
d4d998a
 
 
b1cb07d
 
 
 
 
3c01baa
b1cb07d
 
 
 
3c01baa
b1cb07d
 
 
 
5bd956d
3c01baa
b1cb07d
 
 
 
 
 
 
 
 
 
5bd956d
b1cb07d
3f01f81
 
 
ec912e0
 
b1cb07d
3f01f81
 
b1cb07d
 
 
 
 
 
 
5bd956d
b1cb07d
5bd956d
b1cb07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a17bcda
b1cb07d
 
 
 
 
 
 
 
 
a7eca29
b1cb07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7b55ff
 
 
 
b1cb07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
Handle submissions to the GuardBench leaderboard.
"""

import json
import os
import tempfile
from datetime import datetime
from typing import Dict, List, Tuple
import shutil
import threading
import time

from huggingface_hub import HfApi
from datasets import load_dataset
import subprocess

from src.display.formatting import styled_error, styled_message
from src.envs import RESULTS_DATASET_ID, TOKEN, REPO_ID
from src.leaderboard.processor import process_jsonl_submission
from circleguardbench.evaluator import Evaluator
from circleguardbench.context import GuardbenchContext
from circleguardbench.models_config import ModelType


def validate_submission(file_path: str) -> Tuple[bool, str]:
    """
    Validate a submission file.
    """
    try:
        entries, message = process_jsonl_submission(file_path)
        if not entries:
            return False, message
        return True, "Submission is valid"
    except Exception as e:
        return False, f"Error validating submission: {e}"


def submit_entry_to_hub(entry: Dict, model_name: str, mode: str, version="v0") -> Tuple[bool, str]:
    """
    Submit a model's evaluation entry to the HuggingFace dataset. The entry is uniquely identified by model_name, mode, and version.
    """
    try:
        # Create safe model name for file path
        model_name_safe = model_name.replace("/", "_").replace(" ", "_")
        mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower()

        # Create entry path in entries folder
        entry_path = f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json"

        # Save entry to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
            json.dump(entry, temp_file, indent=2)
            temp_path = temp_file.name

        # Upload file
        api = HfApi(token=TOKEN)
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=entry_path,
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Add evaluation entry for {model_name} (mode {mode}, version {version})"
        )

        os.unlink(temp_path)
        return True, f"Successfully uploaded evaluation entry for {model_name} (mode {mode})"
    except Exception as e:
        return False, f"Error submitting entry to dataset: {e}"


def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]:
    """
    Submit updated leaderboard to the HuggingFace dataset.
    """
    try:
        # Create leaderboard data
        leaderboard_data = {
            "entries": entries,
            "last_updated": datetime.now().isoformat(),
            "version": version
        }

        # Save to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
            json.dump(leaderboard_data, temp_file, indent=2)
            temp_path = temp_file.name

        # Upload file
        api = HfApi(token=TOKEN)
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=f"leaderboards/leaderboard_{version}.json",
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Update leaderboard for version {version}"
        )

        os.unlink(temp_path)
        return True, "Leaderboard updated successfully"
    except Exception as e:
        return False, f"Error updating leaderboard: {e}"


def restart_space_after_delay(delay_seconds: int = 2) -> None:
    """
    Restart the Hugging Face Space after a delay.
    """
    def _restart_space():
        time.sleep(delay_seconds)
        try:
            api = HfApi(token=TOKEN)
            api.restart_space(repo_id=REPO_ID)
        except Exception as e:
            print(f"Error restarting space: {e}")

    # Start the restart in a separate thread
    thread = threading.Thread(target=_restart_space)
    thread.daemon = True
    thread.start()


def process_submission(file_path: str, metadata: Dict, version="v0") -> str:
    """
    Process a submission to the GuardBench leaderboard.
    """
    try:
        # Validate submission
        is_valid, validation_message = validate_submission(file_path)
        if not is_valid:
            return styled_error(validation_message)

        # Get GuardBench results directory path
        guardbench_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "guard-bench-submodule")
        results_dir = os.path.join(guardbench_dir, "results")
        os.makedirs(results_dir, exist_ok=True)

        # Copy submission to GuardBench results directory
        model_name = metadata.get("model_name", "unknown")
        model_name_safe = model_name.replace("/", "_").replace(" ", "_")
        guard_model_type = metadata.get("guard_model_type", "unknown")
        target_file = os.path.join(results_dir + "/circleguardbench_public", f"{model_name_safe}.jsonl")

        # Upload raw submission file
        api = HfApi(token=TOKEN)
        submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl"
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=submission_path,
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Add raw submission for {model_name}"
        )
        os.makedirs(results_dir + "/circleguardbench_public", exist_ok=True)

        # (f"Submission path: {submission_path}")
        # print(f"Target file: {target_file}")
        # printprint(f"Results dir: {results_dir}")


        shutil.copy2(file_path, target_file)
        # print(f"Copied file to target file: {target_file}")
        # print(f" ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/: {subprocess.check_output('ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/', shell=True).decode('utf-8')}")

        try:
            # Initialize GuardBench context
            ctx = GuardbenchContext()
            # Set results directory
            ctx.results_dir = results_dir
            # Set bench name from the results directory
            ctx.bench_name = "circleguardbench_public"
            # Load dataset
            ctx.load_dataset("whitecircle-ai/circleguardbench_public")
            # Mark as initialized
            ctx.is_initialized = True

            evaluator = Evaluator(ctx, force=True, using_cached=True)

            # Run evaluation and get entry
            evaluator.evaluate_model(model_name_safe, str(guard_model_type).lower())

            # Get the entry from results
            with open(os.path.join(results_dir + "/" + ctx.bench_name, "leaderboard.json"), 'r') as f:
                results_data = json.load(f)
                model_entry = next(
                    (entry for entry in results_data.get("entries", [])
                     if entry.get("model_name") == model_name_safe),
                    None
                )

            if not model_entry:
                return styled_error("No evaluation results found")

            # Add metadata to entry
            model_entry.update({
                "model_name": metadata.get("model_name"),  # Use original model name
                "model_type": metadata.get("model_type"),
                "guard_model_type": str(metadata.get("guard_model_type")).lower(),
                "mode": metadata.get("mode"),
                "base_model": metadata.get("base_model"),
                "revision": metadata.get("revision"),
                "precision": metadata.get("precision"),
                "weight_type": metadata.get("weight_type"),
                "version": version,
                "submission_date": datetime.now().isoformat()
            })

            # Submit entry to entries folder
            success, message = submit_entry_to_hub(model_entry, model_name, metadata.get("mode"), version)
            if not success:
                return styled_error(message)

            # Get all entries from HF dataset
            api = HfApi(token=TOKEN)
            files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset")
            entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")]

            all_entries = []
            for entry_file in entry_files:
                try:
                    entry_path = api.hf_hub_download(
                        repo_id=RESULTS_DATASET_ID,
                        filename=entry_file,
                        repo_type="dataset",
                    )
                    with open(entry_path, 'r') as f:
                        entry_data = json.load(f)
                        all_entries.append(entry_data)
                except Exception as e:
                    print(f"Error loading entry {entry_file}: {e}")

            # Update leaderboard with all entries
            success, message = submit_leaderboard_to_hub(all_entries, version)
            if not success:
                return styled_error(message)

            restart_space_after_delay(5)

            return styled_message("Submission successful! Model evaluated and leaderboard updated.")

        except Exception as eval_error:
            return styled_error(f"Error during evaluation: {eval_error}")

    except Exception as e:
        return styled_error(f"Error processing submission: {e}")
    finally:
        # Clean up temporary files
        try:
            if os.path.exists(file_path):
                os.remove(file_path)
            if os.path.exists(target_file):
                os.remove(target_file)
        except:
            pass