import os
import json
import re
from shutil import copyfile
def check_oracle(file_path):
    k = 10
    with open(file_path, 'r') as f:
        data = json.load(f)
    labels = []
    for i in range(k):
        name = f"a_{i}"
        if 'label' in data[name] and data[name]['label'] == 1:
            return True
    if 'oracle' in data:
        return check_syntax(data['oracle'])
    return False
    
def check_syntax(oracle):
    pattern = r' +'
    # 将所有连续的空白字符替换为单个空格
    oracle = re.sub(pattern, ' ', oracle)
    match = re.search(r'theorem[\s\S]*?shows "[\s\S]*?"', oracle)
    if match:
        return True
    else:
        return False

def get_json_files(root_dir):
    json_files = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                if check_oracle(file_path):
                    json_files.append(file_path)
    return json_files

if __name__ == "__main__":
    file_list = []
    for s in ['0','15','17','48']:
        json_files = get_json_files(f'MATH/batch/task_test_gpt-4/{s}')
        file_list.extend(json_files)
    print(len(file_list))
    for file in file_list:
        target = file.replace('MATH', 'un_oracle')
        # print(file, target)
        # copyfile(file, target)
