import json
import os
import tqdm
import time

# load each json in data/train
data_dir = "./train"
data_list = os.listdir(data_dir)
for data in tqdm.tqdm(data_list):
    with open(os.path.join(data_dir, data)) as file:
        thy_series = json.load(file)
    problem = thy_series['problem']
    solution = thy_series['solution']
    answer = thy_series['answer']
    formal_problem = thy_series['formal problem']
    check = thy_series['check']
    # #### check the formal problem
    for i in range(0, 80):
        thy = thy_series.get(f'generation_{i}', {})
        formal_problem = thy.get("formal problem", "")
        informal_problem = thy.get("informal problem", "")
        answer = thy.get("answer", "")
        solution = thy.get("solution", "")
        if formal_problem == "" or informal_problem == "" or answer == "" or solution == "":
            with open(os.path.join('./train-1', data)) as file:
                tmp_thy_series = json.load(file)
            tmp_thy = tmp_thy_series.get(f'generation_{i}', {})
            thy['formal problem'] = tmp_thy.get("formal problem", "")
            thy['informal problem'] = tmp_thy.get("informal problem", "")
            thy['answer'] = tmp_thy.get("answer", "")
            thy['solution'] = tmp_thy.get("solution", "")
            thy_series[f'generation_{i}'] = thy
            with open(os.path.join(data_dir, data), 'w') as file:
                json.dump(thy_series, file, indent=4)