import os import argparse from transformers import AutoTokenizer, AutoModel from huggingface_hub import login, HfApi import logging from tqdm import tqdm # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def setup_auth(token): """Setup Hugging Face authentication""" try: login(token) logger.info("Successfully authenticated with Hugging Face") except Exception as e: logger.error(f"Authentication failed: {str(e)}") raise def list_models(pattern=None): """List available models matching the pattern""" try: api = HfApi() models = api.list_models(pattern=pattern, full=True) return [(model.modelId, model.downloads) for model in models] except Exception as e: logger.error(f"Failed to list models: {str(e)}") raise def download_model(model_name, output_dir): """Download model and tokenizer""" try: logger.info(f"Downloading model: {model_name}") # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) # Download tokenizer logger.info("Downloading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.save_pretrained(os.path.join(output_dir, model_name)) # Download model logger.info("Downloading model...") model = AutoModel.from_pretrained(model_name) model.save_pretrained(os.path.join(output_dir, model_name)) logger.info(f"Successfully downloaded {model_name} to {output_dir}") return True except Exception as e: logger.error(f"Failed to download model {model_name}: {str(e)}") raise def main(): parser = argparse.ArgumentParser(description='Download models from Hugging Face') parser.add_argument('--token', type=str, help='Hugging Face API token') parser.add_argument('--model', type=str, help='Model name to download') parser.add_argument('--output', type=str, default='./models', help='Output directory for downloaded models') parser.add_argument('--search', type=str, help='Search pattern for models') parser.add_argument('--list', action='store_true', help='List available models matching the search pattern') args = parser.parse_args() try: # Setup authentication if token provided if args.token: setup_auth(args.token) # List models if requested if args.list: logger.info(f"Searching for models matching: {args.search}") models = list_models(args.search) print("\nAvailable models:") for model_id, downloads in sorted(models, key=lambda x: x[1], reverse=True): print(f"- {model_id} (Downloads: {downloads:,})") return # Download specific model if args.model: download_model(args.model, args.output) else: logger.error("Please specify a model to download using --model") return except KeyboardInterrupt: logger.info("\nOperation cancelled by user") except Exception as e: logger.error(f"An error occurred: {str(e)}") if __name__ == "__main__": main()