Daniel Kantor commited on
Commit
4668301
·
1 Parent(s): bbc2ce3

fix datasets

Browse files
backend/app/config/hf_config.py CHANGED
@@ -20,7 +20,7 @@ API = HfApi(token=HF_TOKEN)
20
 
21
  # Repository configuration
22
  QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
23
- AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-results"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
25
  OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-official-providers"
26
 
 
20
 
21
  # Repository configuration
22
  QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
23
+ AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
25
  OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-official-providers"
26
 
backend/app/services/leaderboard.py CHANGED
@@ -17,10 +17,10 @@ class LeaderboardService:
17
  """Fetch raw leaderboard data from HuggingFace dataset"""
18
  try:
19
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
- logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
21
 
22
  dataset = datasets.load_dataset(
23
- f"{HF_ORGANIZATION}/contents",
24
  cache_dir=cache_config.get_cache_path("datasets")
25
  )["train"]
26
 
@@ -205,4 +205,4 @@ class LeaderboardService:
205
 
206
  except Exception as e:
207
  logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
208
- raise
 
17
  """Fetch raw leaderboard data from HuggingFace dataset"""
18
  try:
19
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
+ logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"))
21
 
22
  dataset = datasets.load_dataset(
23
+ f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
24
  cache_dir=cache_config.get_cache_path("datasets")
25
  )["train"]
26
 
 
205
 
206
  except Exception as e:
207
  logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
208
+ raise
backend/utils/analyze_prod_datasets.py CHANGED
@@ -120,7 +120,7 @@ def main():
120
  # List of datasets to analyze
121
  datasets = [
122
  {
123
- "id": f"{HF_ORGANIZATION}/contents",
124
  "description": "Aggregated results"
125
  },
126
  {
@@ -167,4 +167,4 @@ def main():
167
  return []
168
 
169
  if __name__ == "__main__":
170
- main()
 
120
  # List of datasets to analyze
121
  datasets = [
122
  {
123
+ "id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
124
  "description": "Aggregated results"
125
  },
126
  {
 
167
  return []
168
 
169
  if __name__ == "__main__":
170
+ main()
backend/utils/analyze_prod_models.py CHANGED
@@ -31,13 +31,13 @@ def count_evaluated_models():
31
  """Count the number of evaluated models"""
32
  try:
33
  # Get dataset info
34
- dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
35
 
36
  # Get file list
37
- files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
38
 
39
  # Get last commit info
40
- commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
41
  last_commit = next(commits, None)
42
 
43
  # Count lines in jsonl files
@@ -47,7 +47,7 @@ def count_evaluated_models():
47
  try:
48
  # Download file content
49
  content = api.hf_hub_download(
50
- repo_id=f"{HF_ORGANIZATION}/contents",
51
  filename=file,
52
  repo_type="dataset"
53
  )
@@ -103,4 +103,4 @@ def main():
103
  return {"error": str(e)}
104
 
105
  if __name__ == "__main__":
106
- main()
 
31
  """Count the number of evaluated models"""
32
  try:
33
  # Get dataset info
34
+ dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
35
 
36
  # Get file list
37
+ files = api.list_repo_files(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
38
 
39
  # Get last commit info
40
+ commits = api.list_repo_commits(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
41
  last_commit = next(commits, None)
42
 
43
  # Count lines in jsonl files
 
47
  try:
48
  # Download file content
49
  content = api.hf_hub_download(
50
+ repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
51
  filename=file,
52
  repo_type="dataset"
53
  )
 
103
  return {"error": str(e)}
104
 
105
  if __name__ == "__main__":
106
+ main()
backend/utils/sync_datasets_locally.py CHANGED
@@ -29,6 +29,7 @@ DATASET_NAMES = [
29
  "llm-security-leaderboard-votes",
30
  "llm-security-leaderboard-requests",
31
  "llm-security-leaderboard-results",
 
32
  "llm-security-leaderboard-official-providers",
33
  ]
34
 
 
29
  "llm-security-leaderboard-votes",
30
  "llm-security-leaderboard-requests",
31
  "llm-security-leaderboard-results",
32
+ "llm-security-leaderboard-contents",
33
  "llm-security-leaderboard-official-providers",
34
  ]
35