meg HF Staff commited on
Commit
9704550
·
verified ·
1 Parent(s): c83b217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -33
app.py CHANGED
@@ -7,30 +7,9 @@ TOKEN = os.environ.get("BULK_ENERGY_TOKEN")
7
  API = HfApi(token=TOKEN)
8
 
9
  REPO_ID = "AIEnergyScore/BulkCalcSpace"
 
10
  app = FastAPI()
11
 
12
- def check_for_traceback(run_dir):
13
- # run_dir="./runs/${experiment_name}/${backend_model}/${now}"
14
- found_error = False
15
- error_message = ""
16
- try:
17
- # Read error message
18
- with open(f"{run_dir}/error.log", 'r') as f:
19
- # There may be a better way to do this that finds the
20
- # index of Traceback, then prints from there : end-of-file index (the file length-1).
21
- for line in f:
22
- # Question: Do we even need to check for this? The presence of the
23
- # error file, or at least a non-empty one,
24
- # means there's been an error, no?
25
- if 'Traceback (most recent call last):' in line:
26
- found_error = True
27
- if found_error:
28
- error_message += line
29
- except FileNotFoundError as e:
30
- # When does this happen?
31
- print(f"Could not find {run_dir}/error.log")
32
- return error_message
33
-
34
  @app.get("/")
35
  def start_train():
36
  model_file = open("models.txt", "r+").readlines()
@@ -38,7 +17,7 @@ def start_train():
38
  hardware_file = open("hardware.txt", "r+").readlines()
39
  for hardware in hardware_file:
40
  hardware = hardware.strip()
41
- print(f"Hardware is {hardware}")
42
  curr_runtime = API.get_space_runtime(repo_id=REPO_ID)
43
  print(f"Current hardware is {curr_runtime}")
44
  if curr_runtime != hardware:
@@ -46,18 +25,22 @@ def start_train():
46
  API.request_space_hardware(repo_id=REPO_ID, hardware=hardware)
47
  for model in model_file:
48
  model = model.strip()
 
49
  for task in task_file:
50
  task = task.strip()
 
51
  # Create the name of the directory for output.
52
  now = time.time()
53
- run_dir = f"/runs/${task}/${model}/${now}"
54
  os.system(f"./entrypoint.sh {REPO_ID} {model} {task} {hardware} {run_dir}")
55
- #space_variables = API.get_space_variables(repo_id=REPO_ID)
56
- #if 'STATUS' not in space_variables or space_variables['STATUS'] != 'COMPUTING':
57
- # print("Beginning processing.")
58
- # API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='COMPUTING')
59
- #os.system(f"./entrypoint.sh {REPO_ID} {model} {task} {hardware}")
60
- #API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='NOT_COMPUTING')
61
- #print("Pausing space")
62
- #API.pause_space(REPO_ID)
63
- return {"Status": "Done"}#space_variables['STATUS']}
 
 
 
7
  API = HfApi(token=TOKEN)
8
 
9
  REPO_ID = "AIEnergyScore/BulkCalcSpace"
10
+ RESULTS_DSET = "AIEnergyScore/BulkCalcResults"
11
  app = FastAPI()
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  @app.get("/")
14
  def start_train():
15
  model_file = open("models.txt", "r+").readlines()
 
17
  hardware_file = open("hardware.txt", "r+").readlines()
18
  for hardware in hardware_file:
19
  hardware = hardware.strip()
20
+ print(f"Requested hardware is {hardware}")
21
  curr_runtime = API.get_space_runtime(repo_id=REPO_ID)
22
  print(f"Current hardware is {curr_runtime}")
23
  if curr_runtime != hardware:
 
25
  API.request_space_hardware(repo_id=REPO_ID, hardware=hardware)
26
  for model in model_file:
27
  model = model.strip()
28
+ print(f"Attempting to benchmark model {model}.")
29
  for task in task_file:
30
  task = task.strip()
31
+ print(f"Attempting to benchmark model {model} on task {task}.")
32
  # Create the name of the directory for output.
33
  now = time.time()
34
+ run_dir = f"/runs/{task}/{model}/{now}"
35
  os.system(f"./entrypoint.sh {REPO_ID} {model} {task} {hardware} {run_dir}")
36
+ # Uploads all run output to the results dataset.
37
+ print(f"Uploading {run_dir} to {RESULTS_DSET}")
38
+ try:
39
+ API.create_repo(repo_id=f"{RESULTS_DSET}", repo_type="dataset",)
40
+ print(f"Created results dataset repository")
41
+ except:
42
+ print(f"Using pre-existing dataset respository.")
43
+ API.upload_folder(folder_path=run_dir, repo_id=f"{RESULTS_DSET}", repo_type="dataset",)
44
+ print("Pausing space")
45
+ API.pause_space(REPO_ID)
46
+ #return {"Status": "Done"}