#!/usr/bin/env python """ Script to update your Hugging Face Space for R1-Distill-LLama-8b training. """ import os import sys import json import argparse import logging from pathlib import Path from huggingface_hub import HfApi, login # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout)] ) logger = logging.getLogger(__name__) def load_env_variables(): """Load environment variables from system or .env file.""" # First try to load from local .env file try: from dotenv import load_dotenv env_path = Path(__file__).parent / ".env" if env_path.exists(): # Load and explicitly set environment variables with open(env_path) as f: for line in f: if line.strip() and not line.startswith('#'): key, value = line.strip().split('=', 1) os.environ[key] = value.strip() logger.info(f"Loaded environment variables from {env_path}") else: logger.warning(f"No .env file found at {env_path}") except ImportError: logger.warning("python-dotenv not installed, skipping .env loading") # Set default space name if not provided if "HF_SPACE_NAME" not in os.environ: os.environ["HF_SPACE_NAME"] = "r1training" # Verify required variables required_vars = { "HF_TOKEN": os.environ.get("HF_TOKEN"), "HF_USERNAME": os.environ.get("HF_USERNAME"), "HF_SPACE_NAME": os.environ.get("HF_SPACE_NAME") } missing_vars = [k for k, v in required_vars.items() if not v] if missing_vars: raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") logger.info(f"Using environment variables: USERNAME={required_vars['HF_USERNAME']}, SPACE_NAME={required_vars['HF_SPACE_NAME']}") return required_vars def verify_configs(): """Verify that all necessary configuration files exist and are valid.""" current_dir = Path(__file__).parent required_files = [ "transformers_config.json", "dataset_config.json", "README.md", "run_transformers_training.py" ] missing_files = [] for file in required_files: if not (current_dir / file).exists(): missing_files.append(file) if missing_files: raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") # Verify JSON configs json_files = [f for f in required_files if f.endswith('.json')] for json_file in json_files: try: with open(current_dir / json_file) as f: json.load(f) logger.info(f"Verified {json_file} is valid JSON") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in {json_file}: {e}") def create_space(username, space_name): """Create or get a Hugging Face Space.""" try: api = HfApi() space_id = f"{username}/{space_name}" logger.info(f"Checking Space {space_id}...") # First try to get the space try: space_info = api.space_info(repo_id=space_id) logger.info(f"Space {space_id} already exists") return space_info except Exception as e: logger.info(f"Space {space_id} does not exist, creating new space...") # Create new space try: api.create_repo( repo_id=space_id, private=False, repo_type="space", space_sdk="gradio" ) logger.info(f"Created new space: {space_id}") return api.space_info(repo_id=space_id) except Exception as e: logger.error(f"Failed to create space: {str(e)}") raise except Exception as e: raise RuntimeError(f"Error with Space {space_id}: {str(e)}") def main(): parser = argparse.ArgumentParser(description='Update Hugging Face Space for R1-Distill-LLama-8b training') parser.add_argument('--space_name', type=str, help='Space name (default: from env)') parser.add_argument('--force', action='store_true', help='Skip confirmation') args = parser.parse_args() if not args.force: print("\n" + "!"*80) print("WARNING: Updating the Space will INTERRUPT any ongoing training!") print("Make sure all checkpoints are saved before proceeding.") print("!"*80 + "\n") confirm = input("Type 'update' to confirm: ") if confirm.lower() != 'update': logger.info("Update cancelled") return False try: # Load environment variables env_vars = load_env_variables() # Verify configurations verify_configs() logger.info("All configuration files verified successfully") # Get space name from args or env, prioritize args space_name = args.space_name if args.space_name else env_vars["HF_SPACE_NAME"] logger.info(f"Using space name: {space_name}") # Login to Hugging Face logger.info("Logging in to Hugging Face...") login(token=env_vars["HF_TOKEN"]) logger.info("Successfully logged in to Hugging Face") # Create/get space space_info = create_space(env_vars["HF_USERNAME"], space_name) logger.info(f"Space info: {space_info}") # Upload files current_dir = Path(__file__).parent logger.info(f"Uploading files from {current_dir} to Space {env_vars['HF_USERNAME']}/{space_name}...") # Create .gitignore with open(current_dir / ".gitignore", "w") as f: f.write(".env\n*.pyc\n__pycache__\n") logger.info("Created .gitignore file") api = HfApi() api.upload_folder( folder_path=str(current_dir), repo_id=f"{env_vars['HF_USERNAME']}/{space_name}", repo_type="space", ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] ) logger.info(f"Files uploaded successfully") space_url = f"https://huggingface.co/spaces/{env_vars['HF_USERNAME']}/{space_name}" logger.info(f"Space URL: {space_url}") print(f"\nSpace created successfully! You can view it at:\n{space_url}") return True except Exception as e: logger.error(f"Error updating Space: {str(e)}") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)