|
from typing import List,Dict |
|
import re |
|
|
|
def parse_model_entries(model_entries: List[str]) -> List[Dict[str, str]]: |
|
""" |
|
Parse a list of model entries into structured dictionaries with provider, model name, version, region, and type. |
|
|
|
Args: |
|
model_entries: List of model entry strings as found in models.txt |
|
|
|
Returns: |
|
List of dictionaries with parsed model information containing keys: |
|
- provider: Name of the provider (e.g., 'azure', 'openai', 'anthropic', etc.) |
|
- model_name: Base name of the model |
|
- version: Version of the model (if available) |
|
- region: Deployment region (if available) |
|
- model_type: Type of the model (text, image, audio based on pattern analysis) |
|
""" |
|
parsed_models = [] |
|
|
|
|
|
known_providers = [ |
|
'azure', 'bedrock', 'anthropic', 'openai', 'cohere', 'google', |
|
'mistral', 'meta', 'amazon', 'ai21', 'anyscale', 'stability', |
|
'cloudflare', 'databricks', 'cerebras', 'assemblyai' |
|
] |
|
|
|
|
|
image_indicators = ['dall-e', 'stable-diffusion', 'image', 'canvas', 'x-', 'steps'] |
|
|
|
|
|
audio_indicators = ['whisper', 'tts', 'audio', 'voice'] |
|
|
|
for entry in model_entries: |
|
model_info = { |
|
'provider': '', |
|
'model_name': '', |
|
'version': '', |
|
'region': '', |
|
'model_type': 'text' |
|
} |
|
|
|
|
|
if any(indicator in entry.lower() for indicator in image_indicators): |
|
model_info['model_type'] = 'image' |
|
|
|
|
|
elif any(indicator in entry.lower() for indicator in audio_indicators): |
|
model_info['model_type'] = 'audio' |
|
|
|
|
|
parts = entry.split('/') |
|
|
|
|
|
if len(parts) >= 2: |
|
|
|
if parts[0].lower() in known_providers: |
|
model_info['provider'] = parts[0].lower() |
|
|
|
|
|
if parts[0].lower() in ['bedrock', 'azure'] and len(parts) >= 3: |
|
|
|
if 'commitment' not in parts[1]: |
|
model_info['region'] = parts[1] |
|
|
|
|
|
model_with_version = parts[-1] |
|
else: |
|
|
|
model_with_version = entry |
|
|
|
|
|
if not model_info['provider']: |
|
|
|
for provider in known_providers: |
|
if provider in model_with_version.lower() or f'{provider}.' in model_with_version.lower(): |
|
model_info['provider'] = provider |
|
|
|
if model_with_version.lower().startswith(f'{provider}.'): |
|
model_with_version = model_with_version[len(provider) + 1:] |
|
break |
|
|
|
|
|
version_match = re.search(r'[:.-]v(\d+(?:\.\d+)*(?:-\d+)?|\d+)(?::\d+)?$', model_with_version) |
|
if version_match: |
|
model_info['version'] = version_match.group(1) |
|
|
|
model_name = model_with_version[:version_match.start()] |
|
else: |
|
|
|
date_match = re.search(r'-(\d{4}-\d{2}-\d{2})$', model_with_version) |
|
if date_match: |
|
model_info['version'] = date_match.group(1) |
|
model_name = model_with_version[:date_match.start()] |
|
else: |
|
model_name = model_with_version |
|
|
|
|
|
model_info['model_name'] = model_name.strip('.-:') |
|
|
|
parsed_models.append(model_info) |
|
|
|
return parsed_models |
|
|
|
|
|
def create_model_hierarchy(model_entries: List[str]) -> Dict[str, Dict[str, Dict[str, Dict[str, str]]]]: |
|
""" |
|
Organize model entries into a nested dictionary structure by provider, model, version, and region. |
|
|
|
Args: |
|
model_entries: List of model entry strings as found in models.txt |
|
|
|
Returns: |
|
Nested dictionary with the structure: |
|
Provider -> Model -> Version -> Region = full model string |
|
If region or version is None, they are replaced with "NA". |
|
""" |
|
|
|
parsed_models = parse_model_entries(model_entries) |
|
|
|
|
|
hierarchy = {} |
|
|
|
for i, model_info in enumerate(parsed_models): |
|
provider = model_info['provider'] if model_info['provider'] else 'unknown' |
|
model_name = model_info['model_name'] |
|
version = model_info['version'] if model_info['version'] else 'NA' |
|
|
|
region = 'NA' if provider == 'azure' else (model_info['region'] if model_info['region'] else 'NA') |
|
|
|
|
|
if provider not in hierarchy: |
|
hierarchy[provider] = {} |
|
|
|
if model_name not in hierarchy[provider]: |
|
hierarchy[provider][model_name] = {} |
|
|
|
if version not in hierarchy[provider][model_name]: |
|
hierarchy[provider][model_name][version] = {} |
|
|
|
|
|
hierarchy[provider][model_name][version][region] = model_entries[i] |
|
|
|
return hierarchy |