File size: 3,437 Bytes
5caedb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from unittest.mock import MagicMock

import numpy as np
import pandas as pd
import pytest

from llm_studio.src.metrics.text_causal_regression_modeling_metrics import (
    Metrics,
    mae_score,
    mse_score,
)


@pytest.fixture
def mock_val_df():
    return pd.DataFrame()


@pytest.fixture
def mock_cfg():
    return MagicMock()


def test_mse_score_single_value():
    results = {
        "predictions": [[1.0], [2.0], [3.0], [4.0]],
        "target_text": ["2.0", "2.0", "2.0", "2.0"],
    }
    cfg = MagicMock()
    val_df = pd.DataFrame()

    score = mse_score(cfg, results, val_df)

    expected = np.array([1.0, 0.0, 1.0, 4.0])
    np.testing.assert_almost_equal(score, expected)


def test_mse_score_multiple_values():
    results = {
        "predictions": [[1.0, 2.0], [3.0, 4.0]],
        "target_text": ["2.0,3.0", "3.0,3.0"],
    }
    cfg = MagicMock()
    val_df = pd.DataFrame()

    score = mse_score(cfg, results, val_df)

    expected = np.array([1.0, 0.5])
    np.testing.assert_almost_equal(score, expected)


def test_mae_score_single_value():
    results = {
        "predictions": [[1.0], [2.0], [3.0], [4.0]],
        "target_text": ["2.0", "2.0", "2.0", "2.0"],
    }
    cfg = MagicMock()
    val_df = pd.DataFrame()

    score = mae_score(cfg, results, val_df)

    expected = np.array([1.0, 0.0, 1.0, 2.0])
    np.testing.assert_almost_equal(score, expected)


def test_mae_score_multiple_values():
    results = {
        "predictions": [[1.0, 2.0], [3.0, 4.0]],
        "target_text": ["2.0,3.0", "3.0,3.0"],
    }
    cfg = MagicMock()
    val_df = pd.DataFrame()

    score = mae_score(cfg, results, val_df)

    expected = np.array([1.0, 0.5])
    np.testing.assert_almost_equal(score, expected)


def test_metrics_names():
    assert Metrics.names() == ["MAE", "MSE"]


def test_metrics_get_mse():
    metric = Metrics.get("MSE")
    assert metric[0] == mse_score
    assert metric[1] == "min"
    assert metric[2] == "mean"


def test_metrics_get_mae():
    metric = Metrics.get("MAE")
    assert metric[0] == mae_score
    assert metric[1] == "min"
    assert metric[2] == "mean"


def test_metrics_get_unknown():
    metric = Metrics.get("Unknown")
    assert metric[0] == mse_score
    assert metric[1] == "min"
    assert metric[2] == "mean"


def test_mse_score_empty_input():
    results = {"predictions": [], "target_text": []}
    cfg = MagicMock()
    val_df = pd.DataFrame()

    with pytest.raises(ValueError):
        mse_score(cfg, results, val_df)


def test_mae_score_empty_input():
    results = {"predictions": [], "target_text": []}
    cfg = MagicMock()
    val_df = pd.DataFrame()

    with pytest.raises(ValueError):
        mae_score(cfg, results, val_df)


def test_mse_score_ignore_raw_results(mock_cfg, mock_val_df):
    results = {"predictions": [[1.0], [2.0]], "target_text": ["2.0", "2.0"]}

    score_without_raw = mse_score(mock_cfg, results, mock_val_df)
    score_with_raw = mse_score(mock_cfg, results, mock_val_df, raw_results=True)

    np.testing.assert_array_equal(score_without_raw, score_with_raw)


def test_mae_score_ignore_raw_results(mock_cfg, mock_val_df):
    results = {"predictions": [[1.0], [2.0]], "target_text": ["2.0", "2.0"]}

    score_without_raw = mae_score(mock_cfg, results, mock_val_df)
    score_with_raw = mae_score(mock_cfg, results, mock_val_df, raw_results=True)

    np.testing.assert_array_equal(score_without_raw, score_with_raw)