@startuml !theme plain skinparam classAttributeIconSize 0 skinparam defaultFontName Arial skinparam class { BackgroundColor PaleTurquoise BorderColor DarkSlateGray } skinparam abstractClass { BackgroundColor LightYellow BorderColor DarkSlateGray } skinparam interface { BackgroundColor White BorderColor Black } ' ----------- Type ParamGrid ----------- class ParamGrid { note as N1 Exemples de clés et valeurs : - "C": [0.1, 1, 10, 100] - "kernel": ["linear", "rbf"] - "gamma": low: 0.001 high: 0.1 log: true Ce type sert à décrire la structure attendue pour le param_grid dans les Optimizers. end note } ' ----------- Interfaces ----------- package "Interfaces" { interface MetricsCalculator { + calculate_and_log(y_true: cp.ndarray, y_pred: cp.ndarray, prefix: str): dict + calculate_and_log_multiclass(y_true: cp.ndarray, y_pred: cp.ndarray, prefix: str): dict -- note bottom Génère toujours : accuracy, f1, precision, recall, auc-roc end note } interface Vectorizer { + fit_transform(texts: cudf.Series): cp.ndarray | csr_matrix + transform(texts: cudf.Series): cp.ndarray | csr_matrix } interface HyperparameterOptimizer { + optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict -- note bottom Renvoie un dict correspondant \nà la best config trouvée ex: {"C": 1, "kernel": "linear", "gamma": 0.01} end note } } ' ----------- Base Classes ----------- package "Base Classes" { abstract class BaseTrainer { - config: Config - classifier: object - metrics_calculator: MetricsCalculator -- + __init__(config: Config, data_path: str, target_column: str) + build_components(): void + train(): void + evaluate(): void + log_parameters_to_mlflow(): void + optimize_if_needed(): void - _prepare_input_for_fit(X: cp.ndarray | csr_matrix): cp.ndarray | csr_matrix - _prepare_input_for_predict(X: cp.ndarray | csr_matrix): cp.ndarray | csr_matrix - _get_binary_predictions(X: cp.ndarray): cp.ndarray - _get_positive_probabilities(X: cp.ndarray): cp.ndarray | None - _get_label_dtype(): cp.dtype note right log_parameters_to_mlflow(): appelle la fonction singledispatch get_relevant_params_for_logging(self). optimize_if_needed(): Vérifie dans self.config \nsi un optimizer est défini, \npuis appelle optimize() \nsur ce dernier si besoin. end note } abstract class CuMLTrainer extends BaseTrainer { - vectorizer: Vectorizer - classifier: cuML.Base -- + build_components(): void + train(): void + evaluate(): void - _prepare_input_for_fit(X: cp.ndarray | csr_matrix): cp.ndarray - _prepare_input_for_predict(X: cp.ndarray | csr_matrix): cp.ndarray } } ' ----------- Concrete Trainers (cuML) ----------- package "Concrete Trainers (cuML)" { class SvmTrainer extends CuMLTrainer { - classifier: SVC -- + _build_classifier(): void note bottom SvmTrainer est affecté \npar les paramètres C, kernel, \net gamma (pour RBF). end note } class RandomForestTrainer extends CuMLTrainer { - classifier: RandomForestClassifier -- + _build_classifier(): void } class LogisticRegressionTrainer extends CuMLTrainer { - classifier: LogisticRegression -- + _build_classifier(): void } class LinearRegressionTrainer extends CuMLTrainer { - classifier: LinearRegression -- + _build_classifier(): void } } ' ----------- Concrete Trainers (Hugging Face) ----------- package "Concrete Trainers (Hugging Face)" { class HuggingFaceTransformerTrainer extends BaseTrainer { - tokenizer: AutoTokenizer - model: AutoModelForSequenceClassification - hf_trainer: Trainer -- + build_components(): void + train(): void + evaluate(): void - _create_torch_dataset(texts: cudf.Series, labels: cp.ndarray): torch.utils.data.Dataset - _prepare_training_args(): TrainingArguments note right Ce trainer n'utilise pas la config vectorizer end note } } ' ----------- Hyperparameter Optimizers ----------- package "Hyperparameter Optimizers" { class OptunaOptimizer { - study: optuna.study.Study - objective: function -- + optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict -- note bottom Implementation: 1) Crée/récupère un study Optuna. 2) Définit l'objective (fonction de coût). Ex: Utilise param_grid["C"] \npour suggérer \ntrial.suggest_float("C",...) 3) Applique les hyperparams au trainer \n(e.g. trainer.classifier = SVC(**params)). 4) study.optimize(..., n_trials=...) 5) Retourne la meilleure config sous forme d'un dict end note } class RayTuneOptimizer { - param_space: dict - search_alg: object - scheduler: object -- + optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict -- note bottom Implementation: 1) Convertit param_grid \nen param_space pour Ray Tune. (ex: "C": tune.grid_search([...])) 2) Lance tune.run(...). 3) Utilise search_alg/scheduler. 4) Retourne la meilleure config \nsous forme d'un dict end note } OptunaOptimizer ..> HyperparameterOptimizer : «implements» RayTuneOptimizer ..> HyperparameterOptimizer : «implements» } ' ----------- MLflow Integration ----------- package "MLflow Integration" { class MLflowDecorator { - experiment_name: str - tracking_uri: str -- + __init__(experiment_name: str, tracking_uri: str): void + __call__(func: function): function + _start_run(): void + _log_params(params: dict): void + _log_metrics(metrics: dict): void + _log_artifacts(artifacts: dict): void + _end_run(status: str): void } } ' ----------- Utilities ----------- package "Utilities" { class CuMLPyFuncWrapper { - vectorizer: Vectorizer - classifier: object -- + load_context(context): void + predict(context, model_input: pd.DataFrame): np.ndarray } } ' ----------- Configuration ----------- package "Configuration" { class Config <> { + model: ModelConfig + vectorization: VectorizationConfig + data: DataConfig + hyperparameters: HyperparameterConfig } class ModelConfig <> { + type: str + params: dict } class VectorizationConfig <> { + method: str + tfidf: dict + bow: dict } class DataConfig <> { + path: str + target_column: str } class HyperparameterConfig <> { + optimizer: str + param_grid: dict + n_trials: int -- note bottom Exemple de param_grid pour SVM: \{ "C": [0.1, 1, 10, 100], "kernel": ["linear", "rbf"], "gamma": \{ "low": 0.001, "high": 0.1, "log": true \} \} n_trials: 50 end note } Config <|-- ModelConfig Config <|-- VectorizationConfig Config <|-- DataConfig Config <|-- HyperparameterConfig note left of Config Extrait YAML: hyperparameters: optimizer: "optuna" param_grid: C: [0.1, 1, 10, 100] kernel: - "linear" - "rbf" gamma: low: 0.001 high: 0.1 log: true n_trials: 50 = Hydra -> DictConfig -> Config(Pydantic) end note } ' ----------- singledispatch function ----------- package "Parameter Logging (singledispatch)" { object get_relevant_params_for_logging <> note bottom @singledispatch def get_relevant_params_for_logging(trainer: BaseTrainer) -> dict: ... @get_relevant_params_for_logging.register def _(trainer: HuggingFaceTransformerTrainer) -> dict: ... @get_relevant_params_for_logging.register def _(trainer: SvmTrainer) -> dict: ... etc. end note } ' ----------- Relations ----------- BaseTrainer ..> MetricsCalculator : «uses» BaseTrainer ..> HyperparameterOptimizer : «may use» BaseTrainer ..> MLflowDecorator : «may be decorated by» BaseTrainer ..> get_relevant_params_for_logging : «calls singledispatch function» CuMLTrainer ..> cuML.Base : «uses» CuMLTrainer ..> CuMLPyFuncWrapper : «for saving model» HuggingFaceTransformerTrainer ..> AutoTokenizer : «uses» HuggingFaceTransformerTrainer ..> AutoModelForSequenceClassification : «uses» HuggingFaceTransformerTrainer ..> Trainer : «uses» HuggingFaceTransformerTrainer ..> TrainingArguments : «uses» MLflowDecorator ..> mlflow : «uses» @enduml