Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
""" | |
Script to update your Hugging Face Space for R1-Distill-LLama-8b training. | |
""" | |
import os | |
import sys | |
import json | |
import argparse | |
import logging | |
from pathlib import Path | |
from huggingface_hub import HfApi, login | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
handlers=[logging.StreamHandler(sys.stdout)] | |
) | |
logger = logging.getLogger(__name__) | |
def load_env_variables(): | |
"""Load environment variables from system or .env file.""" | |
# First try to load from local .env file | |
try: | |
from dotenv import load_dotenv | |
env_path = Path(__file__).parent / ".env" | |
if env_path.exists(): | |
# Load and explicitly set environment variables | |
with open(env_path) as f: | |
for line in f: | |
if line.strip() and not line.startswith('#'): | |
key, value = line.strip().split('=', 1) | |
os.environ[key] = value.strip() | |
logger.info(f"Loaded environment variables from {env_path}") | |
else: | |
logger.warning(f"No .env file found at {env_path}") | |
except ImportError: | |
logger.warning("python-dotenv not installed, skipping .env loading") | |
# Set default space name if not provided | |
if "HF_SPACE_NAME" not in os.environ: | |
os.environ["HF_SPACE_NAME"] = "r1training" | |
# Verify required variables | |
required_vars = { | |
"HF_TOKEN": os.environ.get("HF_TOKEN"), | |
"HF_USERNAME": os.environ.get("HF_USERNAME"), | |
"HF_SPACE_NAME": os.environ.get("HF_SPACE_NAME") | |
} | |
missing_vars = [k for k, v in required_vars.items() if not v] | |
if missing_vars: | |
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") | |
logger.info(f"Using environment variables: USERNAME={required_vars['HF_USERNAME']}, SPACE_NAME={required_vars['HF_SPACE_NAME']}") | |
return required_vars | |
def verify_configs(): | |
"""Verify that all necessary configuration files exist and are valid.""" | |
current_dir = Path(__file__).parent | |
required_files = [ | |
"transformers_config.json", | |
"dataset_config.json", | |
"README.md", | |
"run_transformers_training.py" | |
] | |
missing_files = [] | |
for file in required_files: | |
if not (current_dir / file).exists(): | |
missing_files.append(file) | |
if missing_files: | |
raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
# Verify JSON configs | |
json_files = [f for f in required_files if f.endswith('.json')] | |
for json_file in json_files: | |
try: | |
with open(current_dir / json_file) as f: | |
json.load(f) | |
logger.info(f"Verified {json_file} is valid JSON") | |
except json.JSONDecodeError as e: | |
raise ValueError(f"Invalid JSON in {json_file}: {e}") | |
def create_space(username, space_name): | |
"""Create or get a Hugging Face Space.""" | |
try: | |
api = HfApi() | |
space_id = f"{username}/{space_name}" | |
logger.info(f"Checking Space {space_id}...") | |
# First try to get the space | |
try: | |
space_info = api.space_info(repo_id=space_id) | |
logger.info(f"Space {space_id} already exists") | |
return space_info | |
except Exception as e: | |
logger.info(f"Space {space_id} does not exist, creating new space...") | |
# Create new space | |
try: | |
api.create_repo( | |
repo_id=space_id, | |
private=False, | |
repo_type="space", | |
space_sdk="gradio" | |
) | |
logger.info(f"Created new space: {space_id}") | |
return api.space_info(repo_id=space_id) | |
except Exception as e: | |
logger.error(f"Failed to create space: {str(e)}") | |
raise | |
except Exception as e: | |
raise RuntimeError(f"Error with Space {space_id}: {str(e)}") | |
def main(): | |
parser = argparse.ArgumentParser(description='Update Hugging Face Space for R1-Distill-LLama-8b training') | |
parser.add_argument('--space_name', type=str, help='Space name (default: from env)') | |
parser.add_argument('--force', action='store_true', help='Skip confirmation') | |
args = parser.parse_args() | |
if not args.force: | |
print("\n" + "!"*80) | |
print("WARNING: Updating the Space will INTERRUPT any ongoing training!") | |
print("Make sure all checkpoints are saved before proceeding.") | |
print("!"*80 + "\n") | |
confirm = input("Type 'update' to confirm: ") | |
if confirm.lower() != 'update': | |
logger.info("Update cancelled") | |
return False | |
try: | |
# Load environment variables | |
env_vars = load_env_variables() | |
# Verify configurations | |
verify_configs() | |
logger.info("All configuration files verified successfully") | |
# Get space name from args or env, prioritize args | |
space_name = args.space_name if args.space_name else env_vars["HF_SPACE_NAME"] | |
logger.info(f"Using space name: {space_name}") | |
# Login to Hugging Face | |
logger.info("Logging in to Hugging Face...") | |
login(token=env_vars["HF_TOKEN"]) | |
logger.info("Successfully logged in to Hugging Face") | |
# Create/get space | |
space_info = create_space(env_vars["HF_USERNAME"], space_name) | |
logger.info(f"Space info: {space_info}") | |
# Upload files | |
current_dir = Path(__file__).parent | |
logger.info(f"Uploading files from {current_dir} to Space {env_vars['HF_USERNAME']}/{space_name}...") | |
# Create .gitignore | |
with open(current_dir / ".gitignore", "w") as f: | |
f.write(".env\n*.pyc\n__pycache__\n") | |
logger.info("Created .gitignore file") | |
api = HfApi() | |
api.upload_folder( | |
folder_path=str(current_dir), | |
repo_id=f"{env_vars['HF_USERNAME']}/{space_name}", | |
repo_type="space", | |
ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] | |
) | |
logger.info(f"Files uploaded successfully") | |
space_url = f"https://huggingface.co/spaces/{env_vars['HF_USERNAME']}/{space_name}" | |
logger.info(f"Space URL: {space_url}") | |
print(f"\nSpace created successfully! You can view it at:\n{space_url}") | |
return True | |
except Exception as e: | |
logger.error(f"Error updating Space: {str(e)}") | |
return False | |
if __name__ == "__main__": | |
success = main() | |
sys.exit(0 if success else 1) |