emotion_classifier / uml.plantuml
fioriclass's picture
initialisation
bf5fb5f
@startuml
!theme plain
skinparam classAttributeIconSize 0
skinparam defaultFontName Arial
skinparam class {
BackgroundColor PaleTurquoise
BorderColor DarkSlateGray
}
skinparam abstractClass {
BackgroundColor LightYellow
BorderColor DarkSlateGray
}
skinparam interface {
BackgroundColor White
BorderColor Black
}
' ----------- Type ParamGrid -----------
class ParamGrid {
note as N1
Exemples de clés et valeurs :
- "C": [0.1, 1, 10, 100]
- "kernel": ["linear", "rbf"]
- "gamma":
low: 0.001
high: 0.1
log: true
Ce type sert à décrire la structure attendue pour le param_grid
dans les Optimizers.
end note
}
' ----------- Interfaces -----------
package "Interfaces" {
interface MetricsCalculator {
+ calculate_and_log(y_true: cp.ndarray, y_pred: cp.ndarray, prefix: str): dict
+ calculate_and_log_multiclass(y_true: cp.ndarray, y_pred: cp.ndarray, prefix: str): dict
--
note bottom
Génère toujours : accuracy, f1, precision, recall, auc-roc
end note
}
interface Vectorizer {
+ fit_transform(texts: cudf.Series): cp.ndarray | csr_matrix
+ transform(texts: cudf.Series): cp.ndarray | csr_matrix
}
interface HyperparameterOptimizer {
+ optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict
--
note bottom
Renvoie un dict correspondant \nà la best config trouvée
ex: {"C": 1, "kernel": "linear", "gamma": 0.01}
end note
}
}
' ----------- Base Classes -----------
package "Base Classes" {
abstract class BaseTrainer {
- config: Config
- classifier: object
- metrics_calculator: MetricsCalculator
--
+ __init__(config: Config, data_path: str, target_column: str)
+ build_components(): void
+ train(): void
+ evaluate(): void
+ log_parameters_to_mlflow(): void
+ optimize_if_needed(): void
-
_prepare_input_for_fit(X: cp.ndarray | csr_matrix): cp.ndarray | csr_matrix
-
_prepare_input_for_predict(X: cp.ndarray | csr_matrix): cp.ndarray | csr_matrix
-
_get_binary_predictions(X: cp.ndarray): cp.ndarray
-
_get_positive_probabilities(X: cp.ndarray): cp.ndarray | None
-
_get_label_dtype(): cp.dtype
note right
log_parameters_to_mlflow():
appelle la fonction singledispatch
get_relevant_params_for_logging(self).
optimize_if_needed():
Vérifie dans self.config \nsi un optimizer est défini, \npuis appelle optimize() \nsur ce dernier si besoin.
end note
}
abstract class CuMLTrainer extends BaseTrainer {
- vectorizer: Vectorizer
- classifier: cuML.Base
--
+ build_components(): void
+ train(): void
+ evaluate(): void
-
_prepare_input_for_fit(X: cp.ndarray | csr_matrix): cp.ndarray
-
_prepare_input_for_predict(X: cp.ndarray | csr_matrix): cp.ndarray
}
}
' ----------- Concrete Trainers (cuML) -----------
package "Concrete Trainers (cuML)" {
class SvmTrainer extends CuMLTrainer {
- classifier: SVC
--
+ _build_classifier(): void
note bottom
SvmTrainer est affecté \npar les paramètres C, kernel, \net gamma (pour RBF).
end note
}
class RandomForestTrainer extends CuMLTrainer {
- classifier: RandomForestClassifier
--
+ _build_classifier(): void
}
class LogisticRegressionTrainer extends CuMLTrainer {
- classifier: LogisticRegression
--
+ _build_classifier(): void
}
class LinearRegressionTrainer extends CuMLTrainer {
- classifier: LinearRegression
--
+ _build_classifier(): void
}
}
' ----------- Concrete Trainers (Hugging Face) -----------
package "Concrete Trainers (Hugging Face)" {
class HuggingFaceTransformerTrainer extends BaseTrainer {
- tokenizer: AutoTokenizer
- model: AutoModelForSequenceClassification
- hf_trainer: Trainer
--
+ build_components(): void
+ train(): void
+ evaluate(): void
-
_create_torch_dataset(texts: cudf.Series, labels: cp.ndarray): torch.utils.data.Dataset
-
_prepare_training_args(): TrainingArguments
note right
Ce trainer n'utilise pas
la config vectorizer
end note
}
}
' ----------- Hyperparameter Optimizers -----------
package "Hyperparameter Optimizers" {
class OptunaOptimizer {
- study: optuna.study.Study
- objective: function
--
+ optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict
--
note bottom
Implementation:
1) Crée/récupère un study Optuna.
2) Définit l'objective (fonction de coût).
Ex: Utilise param_grid["C"] \npour suggérer \ntrial.suggest_float("C",...)
3) Applique les hyperparams au trainer \n(e.g. trainer.classifier = SVC(**params)).
4) study.optimize(..., n_trials=...)
5) Retourne la meilleure config sous forme d'un dict
end note
}
class RayTuneOptimizer {
- param_space: dict
- search_alg: object
- scheduler: object
--
+ optimize(trainer: BaseTrainer, param_grid: ParamGrid): dict
--
note bottom
Implementation:
1) Convertit param_grid \nen param_space pour Ray Tune.
(ex: "C": tune.grid_search([...]))
2) Lance tune.run(...).
3) Utilise search_alg/scheduler.
4) Retourne la meilleure config \nsous forme d'un dict
end note
}
OptunaOptimizer ..> HyperparameterOptimizer : «implements»
RayTuneOptimizer ..> HyperparameterOptimizer : «implements»
}
' ----------- MLflow Integration -----------
package "MLflow Integration" {
class MLflowDecorator {
- experiment_name: str
- tracking_uri: str
--
+ __init__(experiment_name: str, tracking_uri: str): void
+ __call__(func: function): function
+ _start_run(): void
+ _log_params(params: dict): void
+ _log_metrics(metrics: dict): void
+ _log_artifacts(artifacts: dict): void
+ _end_run(status: str): void
}
}
' ----------- Utilities -----------
package "Utilities" {
class CuMLPyFuncWrapper {
- vectorizer: Vectorizer
- classifier: object
--
+ load_context(context): void
+ predict(context, model_input: pd.DataFrame): np.ndarray
}
}
' ----------- Configuration -----------
package "Configuration" {
class Config <<PydanticModel>> {
+ model: ModelConfig
+ vectorization: VectorizationConfig
+ data: DataConfig
+ hyperparameters: HyperparameterConfig
}
class ModelConfig <<PydanticModel>> {
+ type: str
+ params: dict
}
class VectorizationConfig <<PydanticModel>> {
+ method: str
+ tfidf: dict
+ bow: dict
}
class DataConfig <<PydanticModel>> {
+ path: str
+ target_column: str
}
class HyperparameterConfig <<PydanticModel>> {
+ optimizer: str
+ param_grid: dict
+ n_trials: int
--
note bottom
Exemple de param_grid pour SVM:
\{
"C": [0.1, 1, 10, 100],
"kernel": ["linear", "rbf"],
"gamma": \{
"low": 0.001,
"high": 0.1,
"log": true
\}
\}
n_trials: 50
end note
}
Config <|-- ModelConfig
Config <|-- VectorizationConfig
Config <|-- DataConfig
Config <|-- HyperparameterConfig
note left of Config
Extrait YAML:
hyperparameters:
optimizer: "optuna"
param_grid:
C: [0.1, 1, 10, 100]
kernel:
- "linear"
- "rbf"
gamma:
low: 0.001
high: 0.1
log: true
n_trials: 50
=
Hydra -> DictConfig -> Config(Pydantic)
end note
}
' ----------- singledispatch function -----------
package "Parameter Logging (singledispatch)" {
object get_relevant_params_for_logging <<Function>>
note bottom
@singledispatch
def get_relevant_params_for_logging(trainer: BaseTrainer) -> dict:
...
@get_relevant_params_for_logging.register
def _(trainer: HuggingFaceTransformerTrainer) -> dict:
...
@get_relevant_params_for_logging.register
def _(trainer: SvmTrainer) -> dict:
...
etc.
end note
}
' ----------- Relations -----------
BaseTrainer ..> MetricsCalculator : «uses»
BaseTrainer ..> HyperparameterOptimizer : «may use»
BaseTrainer ..> MLflowDecorator : «may be decorated by»
BaseTrainer ..> get_relevant_params_for_logging : «calls singledispatch function»
CuMLTrainer ..> cuML.Base : «uses»
CuMLTrainer ..> CuMLPyFuncWrapper : «for saving model»
HuggingFaceTransformerTrainer ..> AutoTokenizer : «uses»
HuggingFaceTransformerTrainer ..> AutoModelForSequenceClassification : «uses»
HuggingFaceTransformerTrainer ..> Trainer : «uses»
HuggingFaceTransformerTrainer ..> TrainingArguments : «uses»
MLflowDecorator ..> mlflow : «uses»
@enduml