from mmengine.evaluator import BaseMetric
from torchmetrics.functional.text import sacre_bleu_score

import torch

class bleu(BaseMetric):

    default_prefix = 'bleu'  # set default_prefix

    def process(self, data_batch, data_samples):
        outputs = [''.join(data_samples)]
        labels = data_batch['data_samples']['references']
        self.results.append({
            'outputs': outputs,
            'labels': labels,
        })

    def compute_metrics(self, results):
        outputs_list = []
        labels_list = []
        for utt_result in results:
            outputs_list = outputs_list + utt_result['outputs']
            labels_list = labels_list + utt_result['labels']
        return dict(
            bleu_score=sacre_bleu_score(outputs_list, labels_list)*100,
        )
