Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

AurelioAguirre commited on Dec 2, 2024

Commit

dd4ba61

1 Parent(s): f35f208

Changed model

Browse files

Files changed (5) hide show

app/config.yaml +1 -1
client/__init__.py +0 -0
client/client.py +0 -275
client/client_config.yaml +0 -33
main/hf_downloader.py +0 -97

app/config.yaml CHANGED Viewed

@@ -10,7 +10,7 @@ model:
     temperature: 0.7
     repetition_penalty: 1.1
   defaults:
-    model_name: "Qwen/Qwen2.5-Coder-3B-Instruct"
 folders:
   models: "models"

     temperature: 0.7
     repetition_penalty: 1.1
   defaults:
+    model_name: "huihui-ai/Qwen2.5-Coder-32B-Instruct-abliterated"
 folders:
   models: "models"

client/__init__.py DELETED Viewed

File without changes

client/client.py DELETED Viewed

@@ -1,275 +0,0 @@
-import requests
-import json
-import sseclient
-import sys
-from pathlib import Path
-import yaml
-from typing import Optional
-import os
-from litgpt.scripts.convert_hf_checkpoint import convert_hf_checkpoint
-from litgpt.scripts.download import download_from_hub
-DEFAULT_CONFIG = {
-    'server': {'url': 'http://localhost:7860'},
-    'model': {
-        'name': 'Qwen2.5-Coder-7B-Instruct',
-        'download_location': 'huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated',
-        'folder_path': 'huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated',
-        'model_filename': 'model.safetensors'
-    }
-}
-def get_project_root(config: dict) -> Path:
-    client_dir = Path(__file__).parent
-    return (client_dir / config['project']['root_dir']).resolve()
-def get_checkpoints_dir(config: dict) -> Path:
-    root = get_project_root(config)
-    return root / config['project']['checkpoints_dir']
-class LLMClient:
-    def __init__(self, config: dict):
-        self.config = config
-        self.base_url = config['server']['url'].rstrip('/')
-        self.session = requests.Session()
-        self.checkpoints_dir = get_checkpoints_dir(config)
-    def download_model(
-            self,
-            repo_id: Optional[str] = None,
-            access_token: Optional[str] = os.getenv("HF_TOKEN"),
-    ) -> None:
-        repo_id = repo_id or self.config['model']['folder_path']
-        print(f"\nDownloading model from: {repo_id}")
-        download_from_hub(
-            repo_id=repo_id,
-            model_name=self.config['model']['name'],
-            access_token=access_token,
-            tokenizer_only=False,
-            checkpoint_dir=self.checkpoints_dir
-        )
-    def convert_model(
-            self,
-            folder_path: Optional[str] = None,
-            model_name: Optional[str] = None,
-    ) -> None:
-        """Convert downloaded model to LitGPT format."""
-        folder_path = folder_path or self.config['model']['folder_path']
-        model_name = model_name or self.config['model']['name']
-        model_dir = self.checkpoints_dir / folder_path
-        print(f"\nConverting model in: {model_dir}")
-        print(f"Using model name: {model_name}")
-        try:
-            convert_hf_checkpoint(
-                checkpoint_dir=model_dir,
-                model_name=model_name
-            )
-            print("Conversion complete!")
-        except ValueError as e:
-            if "is not a supported config name" in str(e):
-                print(f"\nNote: Model '{model_name}' isn't in LitGPT's predefined configs.")
-                print("You may need to use the model's safetensors files directly.")
-            raise
-    def initialize_model(
-            self,
-            folder_path: Optional[str] = None,
-            mode: Optional[str] = None,
-            **kwargs
-    ) -> dict:
-        """Initialize a converted model using the standard initialize endpoint."""
-        url = f"{self.base_url}/initialize"
-        folder_path = folder_path or self.config['model']['folder_path']
-        mode = mode or self.config['hardware']['mode']
-        # Debug prints
-        print(f"\nDebug - Attempting to initialize model with:")
-        print(f"Model path: {folder_path}")
-        print(f"Mode: {mode}")
-        payload = {
-            "model_path": folder_path,  # This is what the regular initialize endpoint expects
-            "mode": mode,
-            "precision": self.config['hardware'].get('precision'),
-            "quantize": self.config['hardware'].get('quantize'),
-            "gpu_count": self.config['hardware'].get('gpu_count', 'auto'),
-            **kwargs
-        }
-        response = self.session.post(url, json=payload)
-        response.raise_for_status()
-        return response.json()
-    def generate_stream(
-            self,
-            prompt: str,
-            max_new_tokens: Optional[int] = None,
-            temperature: Optional[float] = None,
-            top_k: Optional[int] = None,
-            top_p: Optional[float] = None
-    ):
-        url = f"{self.base_url}/generate/stream"
-        gen_config = self.config.get('generation', {})
-        payload = {
-            "prompt": prompt,
-            "max_new_tokens": max_new_tokens or gen_config.get('max_new_tokens', 50),
-            "temperature": temperature or gen_config.get('temperature', 1.0),
-            "top_k": top_k or gen_config.get('top_k'),
-            "top_p": top_p or gen_config.get('top_p', 1.0)
-        }
-        response = self.session.post(url, json=payload, stream=True)
-        response.raise_for_status()
-        client = sseclient.SSEClient(response)
-        for event in client.events():
-            yield json.loads(event.data)
-def clear_screen():
-    os.system('cls' if os.name == 'nt' else 'clear')
-def load_config(config_path: str = "client_config.yaml") -> dict:
-    try:
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        return config
-    except Exception as e:
-        print(f"Warning: Could not load config file: {str(e)}")
-        print("Using default configuration.")
-        return DEFAULT_CONFIG
-def main():
-    config = load_config()
-    client = LLMClient(config)
-    while True:
-        clear_screen()
-        print("\nLLM Engine Client")
-        print("================")
-        print(f"Server: {client.base_url}")
-        print(f"Current Model: {config['model']['name']}")
-        print("\nOptions:")
-        print("1. Download Model")
-        print("2. Convert Model")
-        print("3. Initialize Model")
-        print("4. Generate Text (Streaming)")
-        print("5. Exit")
-        choice = input("\nEnter your choice (1-5): ").strip()
-        if choice == "1":
-            try:
-                print("\nDownload Model")
-                print("==============")
-                print(f"Default location: {config['model']['download_location']}")
-                if input("\nUse default? (Y/n): ").lower() != 'n':
-                    repo_id = config['model']['download_location']
-                else:
-                    repo_id = input("Enter download location: ").strip()
-                access_token = input("Enter HF access token (or press Enter to use HF_TOKEN env var): ").strip() or None
-                client.download_model(repo_id=repo_id, access_token=access_token)
-                print("\nModel downloaded successfully!")
-                input("\nPress Enter to continue...")
-            except Exception as e:
-                print(f"\nError: {str(e)}")
-                input("\nPress Enter to continue...")
-        elif choice == "2":
-            try:
-                print("\nConvert Model")
-                print("=============")
-                print(f"Default folder path: {config['model']['folder_path']}")
-                print(f"Default model name: {config['model']['name']}")
-                if input("\nUse defaults? (Y/n): ").lower() != 'n':
-                    folder_path = config['model']['folder_path']
-                    model_name = config['model']['name']
-                else:
-                    folder_path = input("Enter folder path: ").strip()
-                    model_name = input("Enter model name: ").strip()
-                client.convert_model(
-                    folder_path=folder_path,
-                    model_name=model_name
-                )
-                print("\nModel converted successfully!")
-                input("\nPress Enter to continue...")
-            except Exception as e:
-                print(f"\nError: {str(e)}")
-                input("\nPress Enter to continue...")
-        elif choice == "3":
-            try:
-                print("\nInitialize Model")
-                print("================")
-                print(f"Default folder path: {config['model']['folder_path']}")
-                if input("\nUse defaults? (Y/n): ").lower() != 'n':
-                    result = client.initialize_model()
-                else:
-                    folder_path = input("Enter model folder path: ").strip()
-                    mode = input("Enter mode (cpu/gpu): ").strip()
-                    result = client.initialize_model(
-                        folder_path=folder_path,
-                        mode=mode
-                    )
-                print("\nSuccess! Model initialized.")
-                print(json.dumps(result, indent=2))
-                input("\nPress Enter to continue...")
-            except Exception as e:
-                print(f"\nError: {str(e)}")
-                input("\nPress Enter to continue...")
-        elif choice == "4":
-            try:
-                print("\nGenerate Text (Streaming)")
-                print("========================")
-                prompt = input("Enter your prompt: ").strip()
-                print("\nGenerating (Ctrl+C to stop)...")
-                print("\nResponse:")
-                try:
-                    for chunk in client.generate_stream(prompt=prompt):
-                        if "error" in chunk:
-                            print(f"\nError: {chunk['error']}")
-                            break
-                        token = chunk.get("token", "")
-                        is_finished = chunk.get("metadata", {}).get("is_finished", False)
-                        if is_finished:
-                            print("\n[Generation Complete]")
-                            break
-                        print(token, end="", flush=True)
-                except KeyboardInterrupt:
-                    print("\n\n[Generation Stopped]")
-                input("\nPress Enter to continue...")
-            except Exception as e:
-                print(f"\nError: {str(e)}")
-                input("\nPress Enter to continue...")
-        elif choice == "5":
-            print("\nGoodbye!")
-            break
-        else:
-            print("\nInvalid choice. Please try again.")
-            input("\nPress Enter to continue...")
-if __name__ == "__main__":
-    main()

client/client_config.yaml DELETED Viewed

@@ -1,33 +0,0 @@
-# Project Configuration
-project:
-  root_dir: ".."
-  checkpoints_dir: "checkpoints"
-# Server Configuration
-server:
-  url: "http://localhost:7860"
-# Model Configuration
-model:
-  name: "Llama-3.2-3B"
-  download_location: "huihui-ai/Llama-3.2-3B-Instruct-abliterated"
-  folder_path: "huihui-ai/Llama-3.2-3B-Instruct-abliterated"
-  model_filename: "lit_model.pth"
-  config_filename: "config.json"
-  tokenizer_filename: "tokenizer.json"
-# Hardware Configuration
-hardware:
-  mode: "gpu"
-  precision: "16-true"
-  # Precision Options: "32-true", "16-mixed", "16-true", "bf16-mixed", "bf16-true"
-  quantize: "bnb.int8"
-  # Quantization Options: "bnb.nf4", "bnb.nf4-dq", "bnb.fp4", "bnb.fp4-dq", "bnb.int8"
-  gpu_count: "auto"
-# Generation Parameters
-generation:
-  max_new_tokens: 500
-  temperature: 1.0
-  top_k: null
-  top_p: 1.0

main/hf_downloader.py DELETED Viewed

@@ -1,97 +0,0 @@
-import os
-import argparse
-from transformers import AutoTokenizer, AutoModel
-from huggingface_hub import login, HfApi
-import logging
-from tqdm import tqdm
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-def setup_auth(token):
-    """Setup Hugging Face authentication"""
-    try:
-        login(token)
-        logger.info("Successfully authenticated with Hugging Face")
-    except Exception as e:
-        logger.error(f"Authentication failed: {str(e)}")
-        raise
-def list_models(pattern=None):
-    """List available models matching the pattern"""
-    try:
-        api = HfApi()
-        models = api.list_models(pattern=pattern, full=True)
-        return [(model.modelId, model.downloads) for model in models]
-    except Exception as e:
-        logger.error(f"Failed to list models: {str(e)}")
-        raise
-def download_model(model_name, output_dir):
-    """Download model and tokenizer"""
-    try:
-        logger.info(f"Downloading model: {model_name}")
-        # Create output directory if it doesn't exist
-        os.makedirs(output_dir, exist_ok=True)
-        # Download tokenizer
-        logger.info("Downloading tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        tokenizer.save_pretrained(os.path.join(output_dir, model_name))
-        # Download model
-        logger.info("Downloading model...")
-        model = AutoModel.from_pretrained(model_name)
-        model.save_pretrained(os.path.join(output_dir, model_name))
-        logger.info(f"Successfully downloaded {model_name} to {output_dir}")
-        return True
-    except Exception as e:
-        logger.error(f"Failed to download model {model_name}: {str(e)}")
-        raise
-def main():
-    parser = argparse.ArgumentParser(description='Download models from Hugging Face')
-    parser.add_argument('--token', type=str, help='Hugging Face API token')
-    parser.add_argument('--model', type=str, help='Model name to download')
-    parser.add_argument('--output', type=str, default='./models',
-                        help='Output directory for downloaded models')
-    parser.add_argument('--search', type=str, help='Search pattern for models')
-    parser.add_argument('--list', action='store_true',
-                        help='List available models matching the search pattern')
-    args = parser.parse_args()
-    try:
-        # Setup authentication if token provided
-        if args.token:
-            setup_auth(args.token)
-        # List models if requested
-        if args.list:
-            logger.info(f"Searching for models matching: {args.search}")
-            models = list_models(args.search)
-            print("\nAvailable models:")
-            for model_id, downloads in sorted(models, key=lambda x: x[1], reverse=True):
-                print(f"- {model_id} (Downloads: {downloads:,})")
-            return
-        # Download specific model
-        if args.model:
-            download_model(args.model, args.output)
-        else:
-            logger.error("Please specify a model to download using --model")
-            return
-    except KeyboardInterrupt:
-        logger.info("\nOperation cancelled by user")
-    except Exception as e:
-        logger.error(f"An error occurred: {str(e)}")
-if __name__ == "__main__":
-    main()