PromptNet / modules /metrics.py
fenglinliu's picture
Upload 55 files
6e32a75 verified
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.meteor import Meteor
from pycocoevalcap.rouge import Rouge
def compute_scores(gts, res):
"""
Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap)
:param gts: Dictionary with the image ids and their gold captions,
:param res: Dictionary with the image ids ant their generated captions
:print: Evaluation score (the mean of the scores of all the instances) for each measure
"""
# Set up scorers
scorers = [
(Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L")
]
eval_res = {}
# Compute score for each metric
for scorer, method in scorers:
try:
score, scores = scorer.compute_score(gts, res, verbose=0)
except TypeError:
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, m in zip(score, method):
eval_res[m] = sc
else:
eval_res[method] = score
return eval_res