File size: 14,453 Bytes
7934b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This script is used to analyze the results of the experiments from a CSV file.

Basic Usage:
    To perform analysis on the adapters experiment results::

        python scoring_and_analysis.py \
            --csv <path to cleaned result csv file> \
            --dataset_type_column <column name in csv with the dataset types>

    To perform analysis on the finetuning experiment results::

        $ python scoring_and_analysis.py \
            --csv <path to csv> \
            --dataset_type_column <column name in csv with the dataset types> \
            -ft

Advanced Usage:
    The script by default shows only the best hyperparameters for each crietria.
    To see a ranking of all the hyperparameters for each criteria in order to visualize
    how the results were selected use the `--show_analysis` flag. Moreover, instead of
    displaying only the best hyperparameters, you can use the `--topk` flag to show the
    top *k* hyperparameters::

        $ python scoring_and_analysis.py \
            --csv <path to csv> \
            --dataset_type_column <dataset_group_column_name> \
            --show_analysis \
            --topk 3

    Instead of doing the analysis over all possible combinations of all the hyperparameters,
    you can restrict the search space only to a subset of experiments. This can be achieved
    by the `-uargs` and the `-cargs` flag for the unconstrained and the constrained
    experiments respectively::

        $ python scoring_and_analysis.py \
            --csv <path to csv> \
            --dataset_type_column <dataset_group_column_name> \
            -cargs 'Adapter Position' encoder \
            -cargs 'Adapter Dropout' 0.5 \
            -uargs 'Train Steps' 5000
"""

import argparse
from typing import Tuple

import numpy as np
import pandas as pd

# CHANGE: Specify the column names and their attributes to consider for the selection
# of the best results
UNCONSTRAINED_EXP_KEY = {'name': 'WER: Test', 'attribute': min}
CONSTRAINED_EXP_KEY = {'name': 'Score', 'attribute': max}

# CHANGE: Hyperparamters of the best run to display in the output
ADAPTER_HYPERPARAMTER_COLUMNS = ['Adapter Dimensions', 'Adapter Dropout', 'Stochastic Depth', 'Train Steps']
FINETUNING_HYPERPARAMETER_COLUMNS = ['Train Steps', 'Learning Rate']

# CHANGE: Column name for the test set WER on the new domain
TEST_WER_COLUMN = 'WER: Test'

# CHANGE: Column name for the test set WER on the original domain
ORIGINAL_TEST_WER_COLUMN = 'WER: Librispeech Test Other'

# CHANGE: Based on the experiment type, get the column name for categorizing the results
EXP_CATEGORY_KEY = {'adapters': 'Adapter Position', 'finetuning': 'Frozen Module'}

# CHANGE: Maximum absolute WER degradation allowed in the original domain
MAX_DEGRADATION_PERCENTAGE = 3

# CHANGE: Baseline WER in the original domain
BASELINE_ORIGINAL_WER = 5.118

# CHANGE: Baseline WER in the domain to be adapted
# The keys of this dictionary should cover all values of the `dataset_type_column`
BASELINE_ADAPTED_WER = {
    'irish_english_male': 20.690,
    'midlands_english_female': 9.612,
    'midlands_english_male': 11.253,
    'northern_english_female': 11.108,
    'northern_english_male': 10.180,
    'scottish_english_female': 12.309,
    'scottish_english_male': 11.942,
    'southern_english_female': 9.701,
    'southern_english_male': 10.215,
    'welsh_english_female': 8.514,
    'welsh_english_male': 11.463,
}


def calculate_original_scale(original_wer):
    wer_do = abs(original_wer - BASELINE_ORIGINAL_WER)
    return (MAX_DEGRADATION_PERCENTAGE - min(MAX_DEGRADATION_PERCENTAGE, wer_do)) / MAX_DEGRADATION_PERCENTAGE


def calculate_adapt_werr(adapted_wer, group):
    return max(BASELINE_ADAPTED_WER[group] - adapted_wer, 0) / BASELINE_ADAPTED_WER[group]


def parse_results(filepath: str, dataset_type_col: str, exp_type: str) -> Tuple[pd.DataFrame]:
    """Calculate the scoring metric for each experiment

    Args:
        filepath: Path to the csv file containing the results
        dataset_type_col: Name of the column containing the dataset types
        exp_type: Type of experiments in the csv file

    Returns:
        Dataframes of all the experiments with scores
    """
    global UNCONSTRAINED_EXP_KEY, TEST_WER_COLUMN

    df = pd.read_csv(filepath)
    df.drop(columns=['Model', 'Model Size'], errors='ignore', inplace=True)  # Drop columns if exists

    if exp_type == 'finetuning':
        df['Frozen Module'] = df['Frozen Module'].replace('-', 'null')

    if 'Score' not in df:
        # Calculate the selection scoring metric
        df['Original Scale'] = df.apply(lambda x: calculate_original_scale(x[ORIGINAL_TEST_WER_COLUMN]), axis=1)
        df['Adapt WERR'] = df.apply(lambda x: calculate_adapt_werr(x[TEST_WER_COLUMN], x[dataset_type_col]), axis=1)
        df['Score'] = df['Original Scale'] * df['Adapt WERR']

        # Round off the values to 4 decimal places
        df = df.round({'Original Scale': 4, 'Adapt WERR': 4, 'Score': 4})

        # Save the updated csv with scores
        df.to_csv(filepath, index=False)

    return df


def display_analysis_table(df_analysis: pd.DataFrame, key_info: dict):
    """Display the analysis table used to select the best hyperparameter configuration

    Args:
        df_analysis: Dataframe of the analysis table
        key_info: Dictionary containing the name of the column and the attribute to use for analysis
    """
    # Calculate each column length for the table
    column_lengths = {x: max(len(x), df_analysis[x].map(str).apply(len).max()) for x in df_analysis.columns}

    print(' | '.join([f'{x:^{column_lengths[x]}}' for x in df_analysis.columns]))
    print('-' * sum([column_lengths[x] + 3 for x in df_analysis.columns]))

    for idx in range(len(df_analysis)):
        row_str = []
        for column in df_analysis.columns:
            row_str.append(f'{df_analysis.iloc[idx][column]:^{column_lengths[column]}}')
        print(' | '.join(row_str))


def display_results(df_all: pd.DataFrame, category: str, best_config: pd.Series, dataset_type_col: str, exp_type: str):
    """Display the Test and the Librispeech Test Other WER for the best configuration.

    Args:
        df_all: Dataframe of all the experiments
        category: Adapter position or frozen module in case of finetuning
        best_config: Best hyperparameter configurations
        dataset_type_col: Name of the column containing the dataset types
        exp_type: Type of experiments in the dataframe
    """
    test_wer_values, ls_test_other_wer_values = [], []

    print(f'{dataset_type_col:^25} | {TEST_WER_COLUMN:<20} | {ORIGINAL_TEST_WER_COLUMN:<20}')
    print('-' * 70)
    for dtype in df_all[dataset_type_col].unique():
        df_filtered = df_all[(df_all[dataset_type_col] == dtype) & (df_all[EXP_CATEGORY_KEY[exp_type]] == category)]
        for col in ADAPTER_HYPERPARAMTER_COLUMNS if exp_type == 'adapters' else FINETUNING_HYPERPARAMETER_COLUMNS:
            df_filtered = df_filtered[df_filtered[col] == best_config[col]]

        if len(df_filtered) == 0:
            continue

        if len(df_filtered) > 1:
            raise ValueError(f'More than one row found for dtype: {dataset_type_col} and category: {category}')

        dtype_data = df_filtered.iloc[0]
        test_wer_values.append(dtype_data[TEST_WER_COLUMN])
        ls_test_other_wer_values.append(dtype_data[ORIGINAL_TEST_WER_COLUMN])
        print(
            f'{dtype_data[dataset_type_col]:^25} | {dtype_data[TEST_WER_COLUMN]:^20} | {dtype_data[ORIGINAL_TEST_WER_COLUMN]:^20}'
        )
    print('-' * 70)
    print(f'{"Average":^25} | {np.mean(test_wer_values):^20} | {np.mean(ls_test_other_wer_values):^20}')
    print('\n')


def get_best_config(
    df_exp: pd.DataFrame, dataset_type_col: str, key_info: dict, topk: int, show_analysis: bool, exp_type: str,
):
    """Get the best hyperparameter configuration for a given subset of experiments.

    Args:
        df_exp: Dataframe of all experiments
        dataset_type_col: Name of the column containing the dataset types
        key_info: Dictionary containing the name of the column and the attribute to use for analysis
        topk: Number of top-k results to display
        show_analysis: Whether to display the analysis table
        exp_type: Type of experiments in the dataframe
    """
    # Columns to consider for hyperparameter combinations
    hyperparamter_cols = ADAPTER_HYPERPARAMTER_COLUMNS if exp_type == 'adapters' else FINETUNING_HYPERPARAMETER_COLUMNS

    # Columns to display in the analysis table
    analysis_columns = list(set([key_info['name'], TEST_WER_COLUMN, ORIGINAL_TEST_WER_COLUMN]))

    df_analyze = df_exp.drop(
        columns=[
            x
            for x in df_exp.columns
            if x not in set(hyperparamter_cols + [EXP_CATEGORY_KEY[exp_type]] + analysis_columns)
        ]
    )

    for category in df_exp[EXP_CATEGORY_KEY[exp_type]].unique():
        # Group all hyperparameter configurations and do mean across all speakers
        df_category_mean = (
            df_analyze[df_analyze[EXP_CATEGORY_KEY[exp_type]] == category]
            .groupby(hyperparamter_cols, as_index=False)[analysis_columns]
            .mean()
        )

        # Sort the values by the key in order to get the top-k results
        df_category_mean.sort_values(
            by=key_info['name'], ascending=True if key_info['attribute'].__qualname__ == 'min' else False, inplace=True
        )

        print('=' * len(category))
        print(category.upper())
        print('=' * len(category) + '\n')

        if show_analysis:
            display_analysis_table(df_category_mean, key_info)
            print('\n')

        for idx in range(min(topk, len(df_category_mean))):
            print('-----')
            print(f'Top-{idx + 1}')
            print('-----')

            df_category_best = df_category_mean.iloc[idx]

            print(f'\nHyperparamters')
            print('---------------\n')
            for hyperparamter in hyperparamter_cols + [key_info['name']]:
                print(f'{hyperparamter:<20}: {df_category_best[hyperparamter]}')
            print()

            print('\nResults')
            print('-------\n')
            display_results(df_exp, category, df_category_best, dataset_type_col, exp_type)


def analyze_results(
    df_exp: pd.DataFrame,
    fixed_hyperparameters: list,
    title: str,
    dataset_type_col: str,
    key_info: dict,
    topk: int,
    show_analysis: bool,
    exp_type: str,
):
    """Perform analysis on a given subset of experiments

    Args:
        df_exp: Dataframe of all experiments
        fixed_hyperparameters: List of pair of hyperparamters and their values to fix in the analysis
        title: Title of the analysis (for logging)
        dataset_type_col: Name of the column containing the dataset types
        key_info: Dictionary containing the name of the column and the attribute to use for analysis
        topk: Number of top-k results to display
        show_analysis: Whether to display the analysis table
        exp_type: Type of experiments in the dataframe
    """
    # Filter experiments based on the fixed hyperparameters
    for hyperparameter_name, hyperparameter_value in fixed_hyperparameters:
        df_exp = df_exp[df_exp[hyperparameter_name] == hyperparameter_value]

    # Perform analysis
    print('+' * len(title))
    print(title)
    print('+' * len(title) + '\n')
    get_best_config(df_exp, dataset_type_col, key_info, topk, show_analysis, exp_type)
    print()


def __validate_arg_type(arg):
    """Validate the type of the command line argument value."""
    dtype = float if '.' in arg else int
    try:
        return dtype(arg)
    except ValueError:
        return arg


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--csv', required=True, help='Path to the cleaned results CSV file')
    parser.add_argument(
        '-dtype',
        '--dataset_type_column',
        required=True,
        help='Name of the column containing the dataset type. Example: For SLR83 it is "Group", for GSC it is "Dataset Size"',
    )
    parser.add_argument(
        '-cargs',
        '--constrained_args',
        nargs=2,
        action='append',
        default=[],
        type=__validate_arg_type,
        help='Hyperparameters to fix for the constrained experiments',
    )
    parser.add_argument(
        '-uargs',
        '--unconstrained_args',
        nargs=2,
        action='append',
        default=[],
        type=__validate_arg_type,
        help='Hyperparameters to fix for the unconstrained experiments',
    )
    parser.add_argument('-k', '--topk', type=int, default=1, help='Number of top-k results to display')
    parser.add_argument(
        '-ft', '--finetuning', action='store_true', help='True if the CSV contains Finetuning experiments'
    )
    parser.add_argument(
        '-s', '--show_analysis', action='store_true', help='Show the key values of all the dataset types'
    )
    args = parser.parse_args()

    # Get the experiment type
    exp_type = 'finetuning' if args.finetuning else 'adapters'

    # Parse CSV file
    df = parse_results(args.csv, args.dataset_type_column, exp_type)

    # Perform analysis - Constrained Adaptation
    analyze_results(
        df,
        args.constrained_args,
        'Constrained Experiment Results',
        args.dataset_type_column,
        CONSTRAINED_EXP_KEY,
        args.topk,
        args.show_analysis,
        exp_type,
    )

    # Perform analysis - Unconstrained Adaptation
    analyze_results(
        df,
        args.unconstrained_args,
        'Unconstrained Experiment Results',
        args.dataset_type_column,
        UNCONSTRAINED_EXP_KEY,
        args.topk,
        args.show_analysis,
        exp_type,
    )