from collections import defaultdict, Counter
from tqdm import tqdm


class DFITF:
    def __init__(self, texts, n):
        self.texts = texts
        self.n = n

        self.dfitf = {}

    def calculate(self):
        for i in range(1, self.n + 1):
            n_grams = [self.get_n_gram(text, i) for text in tqdm(self.texts, desc=f'counting {i} gram')]
            self.calculate_n(n_grams, i)
        dfitf = sorted(self.dfitf.items(), key=lambda e: e[1], reverse=True)
        return dfitf

    def calculate_n(self, n_grams, n):
        dftf = defaultdict(lambda: [0, 0])
        for n_gram in tqdm(n_grams, desc=f'calculating dfitf for {n} gram'):
            cur_tf = Counter(n_gram)
            for phrase, count in cur_tf.items():
                record = dftf[phrase]
                record[0] += 1
                record[1] += count
        for phrase, record in dftf.items():
            self.dfitf[phrase] = record[0] ** 2 / (record[1] * len(n_grams))

    def get_n_gram(self, text, n):
        n_gram = []
        for i in range(len(text) - n + 1):
            n_gram.append(' '.join(text[i:(i + n)]))
        return n_gram
