Spaces:
Build error
Build error
#!/usr/bin/env python | |
""" | |
Quick script to update your Hugging Face Space for phi-4-unsloth-bnb-4bit training. | |
This script handles the specific requirements for the 4-bit quantized Phi-4 model training, | |
including proper configuration and dependency management. | |
""" | |
import os | |
import sys | |
import json | |
import subprocess | |
import argparse | |
import logging | |
from pathlib import Path | |
from huggingface_hub import HfApi, login | |
import getpass | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
handlers=[logging.StreamHandler(sys.stdout)] | |
) | |
logger = logging.getLogger(__name__) | |
def load_env_variables(): | |
"""Load environment variables from system or .env file.""" | |
# First try to load from local .env file | |
try: | |
from dotenv import load_dotenv | |
env_path = Path(__file__).parent / ".env" | |
if env_path.exists(): | |
# Load and explicitly set environment variables | |
with open(env_path) as f: | |
for line in f: | |
if line.strip() and not line.startswith('#'): | |
key, value = line.strip().split('=', 1) | |
os.environ[key] = value.strip() | |
logger.info(f"Loaded environment variables from {env_path}") | |
else: | |
logger.warning(f"No .env file found at {env_path}") | |
except ImportError: | |
logger.warning("python-dotenv not installed, skipping .env loading") | |
# Check if we're running in a Hugging Face Space | |
if os.environ.get("SPACE_ID"): | |
logger.info("Running in Hugging Face Space") | |
if "/" in os.environ.get("SPACE_ID", ""): | |
username = os.environ.get("SPACE_ID").split("/")[0] | |
os.environ["HF_USERNAME"] = username | |
logger.info(f"Set HF_USERNAME from SPACE_ID: {username}") | |
# Verify required variables | |
required_vars = { | |
"HF_TOKEN": os.environ.get("HF_TOKEN"), | |
"HF_USERNAME": os.environ.get("HF_USERNAME"), | |
"HF_SPACE_NAME": os.environ.get("HF_SPACE_NAME", "phi4training") | |
} | |
# Ensure the space name is set correctly | |
if "HF_SPACE_NAME" not in os.environ: | |
os.environ["HF_SPACE_NAME"] = "phi4training" | |
missing_vars = [k for k, v in required_vars.items() if not v] | |
if missing_vars: | |
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") | |
logger.info(f"Using environment variables: USERNAME={required_vars['HF_USERNAME']}, SPACE_NAME={required_vars['HF_SPACE_NAME']}") | |
return required_vars | |
def verify_configs(): | |
"""Verify that all necessary configuration files exist and are valid.""" | |
current_dir = Path(__file__).parent | |
required_files = [ | |
"transformers_config.json", | |
"requirements.txt", | |
"run_transformers_training.py" | |
] | |
missing_files = [] | |
for file in required_files: | |
if not (current_dir / file).exists(): | |
missing_files.append(file) | |
if missing_files: | |
raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
# Verify JSON configs | |
json_files = [f for f in required_files if f.endswith('.json')] | |
for json_file in json_files: | |
try: | |
with open(current_dir / json_file) as f: | |
json.load(f) | |
logger.info(f"Verified {json_file} is valid JSON") | |
except json.JSONDecodeError as e: | |
raise ValueError(f"Invalid JSON in {json_file}: {e}") | |
def update_requirements(): | |
"""Update requirements.txt with necessary packages using a two-stage installation process.""" | |
logger.info("Setting up requirements files for sequential installation...") | |
current_dir = Path(__file__).parent | |
base_req_path = current_dir / "requirements-base.txt" | |
main_req_path = current_dir / "requirements.txt" | |
flash_req_path = current_dir / "requirements-flash.txt" | |
# First ensure base requirements exist | |
required_base_packages = { | |
"torch>=2.0.0", | |
"transformers>=4.36.0", | |
"accelerate>=0.27.0", | |
"bitsandbytes>=0.41.0", | |
"tensorboard>=2.15.0", | |
"gradio>=5.17.0", | |
"huggingface-hub>=0.19.0", | |
"datasets>=2.15.0" | |
} | |
# Additional packages for main requirements | |
required_additional_packages = { | |
"einops>=0.7.0", | |
"filelock>=3.13.1", | |
"matplotlib>=3.7.0", | |
"numpy>=1.24.0", | |
"packaging>=23.0", | |
"peft>=0.9.0", | |
"psutil>=5.9.0", | |
"python-dotenv>=1.0.0", | |
"pyyaml>=6.0.1", | |
"regex>=2023.0.0", | |
"requests>=2.31.0", | |
"safetensors>=0.4.1", | |
"sentencepiece>=0.1.99", | |
"tqdm>=4.65.0", | |
"typing-extensions>=4.8.0", | |
"unsloth>=2024.3" | |
} | |
# Read existing base requirements | |
existing_requirements = set() | |
if base_req_path.exists(): | |
with open(base_req_path) as f: | |
existing_requirements = {line.strip() for line in f if line.strip() and not line.startswith('-r')} | |
# Add new requirements | |
updated_requirements = existing_requirements.union(required_base_packages) | |
# 1. Write updated base requirements | |
with open(base_req_path, 'w') as f: | |
# Ensure torch is first | |
torch_req = next((req for req in updated_requirements if req.startswith("torch")), "torch>=2.0.0") | |
f.write(f"{torch_req}\n") | |
# Write all other requirements (excluding torch) | |
for req in sorted(r for r in updated_requirements if not r.startswith("torch")): | |
f.write(f"{req}\n") | |
# 2. Create main requirements file (references base) | |
with open(main_req_path, 'w') as f: | |
f.write("-r requirements-base.txt\n") | |
for req in sorted(required_additional_packages): | |
f.write(f"{req}\n") | |
# 3. Create or update flash-attn requirements | |
with open(flash_req_path, 'w') as f: | |
f.write("-r requirements-base.txt\n") | |
f.write("flash-attn==2.5.2\n") | |
logger.info("Updated requirements files for sequential installation:") | |
logger.info(f"1. Base requirements in {base_req_path}") | |
logger.info(f"2. Main requirements in {main_req_path}") | |
logger.info(f"3. Flash-attention requirements in {flash_req_path}") | |
logger.info("This ensures packages are installed in the correct order") | |
def create_space(username, space_name): | |
"""Create or get a Hugging Face Space.""" | |
try: | |
api = HfApi() | |
space_id = f"{username}/{space_name}" | |
logger.info(f"Checking Space {space_id}...") | |
# First try to get the space | |
try: | |
space_info = api.space_info(repo_id=space_id) | |
logger.info(f"Space {space_id} already exists") | |
return space_info | |
except Exception as e: | |
logger.info(f"Space {space_id} does not exist, creating new space...") | |
# Create new space | |
try: | |
api.create_repo( | |
repo_id=space_id, | |
private=False, | |
repo_type="space", | |
space_sdk="gradio" | |
) | |
logger.info(f"Created new space: {space_id}") | |
return api.space_info(repo_id=space_id) | |
except Exception as e: | |
logger.error(f"Failed to create space: {str(e)}") | |
raise | |
except Exception as e: | |
raise RuntimeError(f"Error with Space {space_id}: {str(e)}") | |
def main(): | |
"""Main function to update the Space.""" | |
try: | |
# Parse command line arguments | |
parser = argparse.ArgumentParser(description='Update Hugging Face Space for Phi-4 training') | |
parser.add_argument('--space_name', type=str, help='Space name (default: from env)') | |
parser.add_argument('--force', action='store_true', help='Skip confirmation when updating Space') | |
args = parser.parse_args() | |
# Load environment variables | |
env_vars = load_env_variables() | |
verify_configs() | |
# Verify we have the necessary variables | |
if not all(k in env_vars and env_vars[k] for k in ["HF_TOKEN", "HF_USERNAME", "HF_SPACE_NAME"]): | |
logger.error("Missing required environment variables. Please check your .env file.") | |
logger.error(f"HF_TOKEN: {'Set' if 'HF_TOKEN' in env_vars and env_vars['HF_TOKEN'] else 'Not Set'}") | |
logger.error(f"HF_USERNAME: {'Set' if 'HF_USERNAME' in env_vars and env_vars['HF_USERNAME'] else 'Not Set'}") | |
logger.error(f"HF_SPACE_NAME: {'Set' if 'HF_SPACE_NAME' in env_vars and env_vars['HF_SPACE_NAME'] else 'Not Set'}") | |
return False | |
logger.info(f"Environment variables loaded: USERNAME={env_vars['HF_USERNAME']}, SPACE_NAME={env_vars['HF_SPACE_NAME']}") | |
# Ask for confirmation unless forced | |
if not args.force: | |
print("\nWARNING: Updating the Space will INTERRUPT any ongoing training!") | |
confirm = input("Are you sure you want to update the Space? Type 'yes' to confirm: ") | |
if confirm.lower() != 'yes': | |
logger.info("Update cancelled by user") | |
return False | |
# Additional password check for safety | |
password = getpass.getpass("Enter your password to confirm update: ") | |
if password.strip() == "": | |
logger.info("No password entered. Update cancelled.") | |
return False | |
else: | |
logger.info("Skipping confirmation due to --force flag") | |
# Update requirements | |
update_requirements() | |
logger.info("Requirements updated successfully") | |
# Get space name from args or env, prioritize args | |
space_name = args.space_name if args.space_name else env_vars["HF_SPACE_NAME"] | |
logger.info(f"Using space name: {space_name}") | |
# Login to Hugging Face | |
logger.info("Logging in to Hugging Face...") | |
login(token=env_vars["HF_TOKEN"]) | |
logger.info("Successfully logged in to Hugging Face") | |
# Create/get space | |
space_info = create_space(env_vars["HF_USERNAME"], space_name) | |
logger.info(f"Space info: {space_info}") | |
# Upload files | |
current_dir = Path(__file__).parent | |
logger.info(f"Uploading files from {current_dir} to Space {env_vars['HF_USERNAME']}/{space_name}...") | |
# Create .gitignore | |
with open(current_dir / ".gitignore", "w") as f: | |
f.write(".env\n*.pyc\n__pycache__\n") | |
logger.info("Created .gitignore file") | |
api = HfApi() | |
api.upload_folder( | |
folder_path=str(current_dir), | |
repo_id=f"{env_vars['HF_USERNAME']}/{space_name}", | |
repo_type="space", | |
ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] | |
) | |
logger.info(f"Files uploaded successfully") | |
space_url = f"https://huggingface.co/spaces/{env_vars['HF_USERNAME']}/{space_name}" | |
logger.info(f"Space URL: {space_url}") | |
print(f"\nSpace created successfully! You can view it at:\n{space_url}") | |
return True | |
except Exception as e: | |
logger.error(f"Error updating Space: {str(e)}") | |
return False | |
if __name__ == "__main__": | |
success = main() | |
sys.exit(0 if success else 1) |