Spaces:
Paused
Paused
import os | |
from fastapi import FastAPI | |
from huggingface_hub import HfApi | |
import time | |
TOKEN = os.environ.get("BULK_ENERGY_TOKEN") | |
API = HfApi(token=TOKEN) | |
REPO_ID = "AIEnergyScore/BulkCalcSpace" | |
app = FastAPI() | |
def check_for_traceback(run_dir): | |
# run_dir="./runs/${experiment_name}/${backend_model}/${now}" | |
found_error = False | |
error_message = "" | |
try: | |
# Read error message | |
with open(f"{run_dir}/error.log", 'r') as f: | |
# There may be a better way to do this that finds the | |
# index of Traceback, then prints from there : end-of-file index (the file length-1). | |
for line in f: | |
# Question: Do we even need to check for this? The presence of the | |
# error file, or at least a non-empty one, | |
# means there's been an error, no? | |
if 'Traceback (most recent call last):' in line: | |
found_error = True | |
if found_error: | |
error_message += line | |
except FileNotFoundError as e: | |
# When does this happen? | |
print(f"Could not find {run_dir}/error.log") | |
return error_message | |
def start_train(): | |
model_file = open("models.txt", "r+").readlines() | |
task_file = open("tasks.txt", "r+").readlines() | |
hardware_file = open("hardware.txt", "r+").readlines() | |
for hardware in hardware_file: | |
hardware = hardware.strip() | |
print(f"Hardware is {hardware}") | |
curr_runtime = API.get_space_runtime(repo_id=REPO_ID) | |
print(f"Current hardware is {curr_runtime}") | |
if curr_runtime != hardware: | |
print("Trying to switch.") | |
API.request_space_hardware(repo_id=REPO_ID, hardware=hardware) | |
for model in model_file: | |
model = model.strip() | |
for task in task_file: | |
task = task.strip() | |
# Create the name of the directory for output. | |
now = time.time() | |
run_dir = f"/runs/${task}/${model}/${now}" | |
os.system(f"./entrypoint.sh {REPO_ID} {model} {task} {hardware} {run_dir}") | |
#space_variables = API.get_space_variables(repo_id=REPO_ID) | |
#if 'STATUS' not in space_variables or space_variables['STATUS'] != 'COMPUTING': | |
# print("Beginning processing.") | |
# API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='COMPUTING') | |
#os.system(f"./entrypoint.sh {REPO_ID} {model} {task} {hardware}") | |
#API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='NOT_COMPUTING') | |
#print("Pausing space") | |
#API.pause_space(REPO_ID) | |
return {"Status": "Done"}#space_variables['STATUS']} |