from typing import List # Common dictionary to map the columns names COLUMNS_PRETTY = { "bleu": "BLEU", "chrf": "ChrF", "rouge1": "ROUGE-1", "rouge2": "ROUGE-2", "rougeL": "ROUGE-L", "bertscore": "BERTScore", "bertscore_normalized": "BERTScore (Normalized)", "model_name": "Model Name", "model_availability": "Availability", "urls": "Resources", "context_size": "Context Size", "submitted_by": "Submitted By", "EM infile": "EM infile", "EM inproject": "EM inproject", "EM common": "EM common", "EM commited": "EM committed", "EM non_informative": "EM non-informative", "EM random": "EM random", "EM all": "EM all", "context_composer": "Context Composer", "context_length": "Context Size", "dataset": "Dataset", "CompScore": "CompScore", "context": "Context", "task_type": "Task type", } # Add your metrics METRICS_PER_TASK = { "aggregated": [ "Mean Rank", "Mean Score", "Library-based CG", "CI builds repair", "CMG", "Bug localization", "Module summarization", ], "commit_message_generation": [ "BLEU", "ChrF", "ROUGE-1", "ROUGE-2", "ROUGE-L", "BERTScore", "BERTScore (Normalized)", ], "project_code_completion": [ "EM infile", "EM inproject", "EM common", "EM committed", "EM non-informative", "EM random", "EM all", ], "bug_localization": [ "P", "R", "FPR", "F1-score", "All_correct", "All_incorrect", "Output_count", ], "module_summarization": [ "CompScore", ], "library_based_code_generation": [ "API Recall\nno context", "API Recall\n20 APIs", "API Recall\n200 APIs", "API Recall\n2,000 APIs", "API Recall\nall APIs", "ChrF\nno context", "ChrF\n20 APIs", "ChrF\n200 APIs", "ChrF\n2,000 APIs", "ChrF\nall APIs", ], "ci_builds_repair": [ "Pass@1", ], } SORT_COLUMN_PER_TASK = { "commit_message_generation": "ROUGE-1", "project_code_completion": "EM inproject", "bug_localization": "Model Name", "module_summarization": "CompScore", "library_based_code_generation": "API Recall\nall APIs", "ci_builds_repair": "Pass@1", } def get_columns_per_task(task_id: str) -> List[str]: metrics_per_task = METRICS_PER_TASK[task_id] if task_id == 'aggregated': return ["Model Name"] + metrics_per_task if task_id == 'project_code_completion': return ["Model Name", "Context Composer", "Context Size", "Dataset Name", "Dataset"] + metrics_per_task + ["Submitted By", "Resources"] if task_id == 'bug_localization': return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"] if task_id == 'module_summarization': return ["Model Name", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"] if task_id == 'library_based_code_generation': return ["Model Name"] + metrics_per_task + ["Availability", "Submitted By", "Resources"] if task_id == 'ci_builds_repair': return ["Model Name", "Context Size", "Task type"] + metrics_per_task + ["Availability", "Submitted By", "Resources"] return ["Model Name", "Context Size", "Availability"] + metrics_per_task + ["Submitted By", "Resources"] def get_types_per_task(task_id: str) -> List[str]: metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0)) if task_id == 'project_code_completion': return ["html", "markdown", "markdown", "markdown", "html"] + ["number" for _ in metrics_per_task] + ["markdown", "html"] if task_id == 'bug_localization': return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"] if task_id == 'ci_builds_repair': return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "markdown", "html"] return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"]