Spaces:
Sleeping
Sleeping
File size: 3,437 Bytes
5caedb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from unittest.mock import MagicMock
import numpy as np
import pandas as pd
import pytest
from llm_studio.src.metrics.text_causal_regression_modeling_metrics import (
Metrics,
mae_score,
mse_score,
)
@pytest.fixture
def mock_val_df():
return pd.DataFrame()
@pytest.fixture
def mock_cfg():
return MagicMock()
def test_mse_score_single_value():
results = {
"predictions": [[1.0], [2.0], [3.0], [4.0]],
"target_text": ["2.0", "2.0", "2.0", "2.0"],
}
cfg = MagicMock()
val_df = pd.DataFrame()
score = mse_score(cfg, results, val_df)
expected = np.array([1.0, 0.0, 1.0, 4.0])
np.testing.assert_almost_equal(score, expected)
def test_mse_score_multiple_values():
results = {
"predictions": [[1.0, 2.0], [3.0, 4.0]],
"target_text": ["2.0,3.0", "3.0,3.0"],
}
cfg = MagicMock()
val_df = pd.DataFrame()
score = mse_score(cfg, results, val_df)
expected = np.array([1.0, 0.5])
np.testing.assert_almost_equal(score, expected)
def test_mae_score_single_value():
results = {
"predictions": [[1.0], [2.0], [3.0], [4.0]],
"target_text": ["2.0", "2.0", "2.0", "2.0"],
}
cfg = MagicMock()
val_df = pd.DataFrame()
score = mae_score(cfg, results, val_df)
expected = np.array([1.0, 0.0, 1.0, 2.0])
np.testing.assert_almost_equal(score, expected)
def test_mae_score_multiple_values():
results = {
"predictions": [[1.0, 2.0], [3.0, 4.0]],
"target_text": ["2.0,3.0", "3.0,3.0"],
}
cfg = MagicMock()
val_df = pd.DataFrame()
score = mae_score(cfg, results, val_df)
expected = np.array([1.0, 0.5])
np.testing.assert_almost_equal(score, expected)
def test_metrics_names():
assert Metrics.names() == ["MAE", "MSE"]
def test_metrics_get_mse():
metric = Metrics.get("MSE")
assert metric[0] == mse_score
assert metric[1] == "min"
assert metric[2] == "mean"
def test_metrics_get_mae():
metric = Metrics.get("MAE")
assert metric[0] == mae_score
assert metric[1] == "min"
assert metric[2] == "mean"
def test_metrics_get_unknown():
metric = Metrics.get("Unknown")
assert metric[0] == mse_score
assert metric[1] == "min"
assert metric[2] == "mean"
def test_mse_score_empty_input():
results = {"predictions": [], "target_text": []}
cfg = MagicMock()
val_df = pd.DataFrame()
with pytest.raises(ValueError):
mse_score(cfg, results, val_df)
def test_mae_score_empty_input():
results = {"predictions": [], "target_text": []}
cfg = MagicMock()
val_df = pd.DataFrame()
with pytest.raises(ValueError):
mae_score(cfg, results, val_df)
def test_mse_score_ignore_raw_results(mock_cfg, mock_val_df):
results = {"predictions": [[1.0], [2.0]], "target_text": ["2.0", "2.0"]}
score_without_raw = mse_score(mock_cfg, results, mock_val_df)
score_with_raw = mse_score(mock_cfg, results, mock_val_df, raw_results=True)
np.testing.assert_array_equal(score_without_raw, score_with_raw)
def test_mae_score_ignore_raw_results(mock_cfg, mock_val_df):
results = {"predictions": [[1.0], [2.0]], "target_text": ["2.0", "2.0"]}
score_without_raw = mae_score(mock_cfg, results, mock_val_df)
score_with_raw = mae_score(mock_cfg, results, mock_val_df, raw_results=True)
np.testing.assert_array_equal(score_without_raw, score_with_raw)
|