"""
An implementation of a LIBRO like operation
THIS IS NOT FINISHED YET, EXPECT WEIRD BEHAVIOR

Takes several samples for unittest generation,
looks at their evaluation trace and picks the one that most closely
resembles the issue description (picked by LLM or so)
"""
import bisect
from collections import defaultdict
from typing import List, Optional, Tuple
import json
import pathlib

import fire
from unidiff import PatchSet

from datasets import load_from_disk

from measure_coverage_patch import log, extract_coverages_from_eval_output, extract_changed_lines_from_patch, \
    extract_good_case_from_eval_output, no_lines_covered, extract_number_added_tests_from_patch, load_eval_outputs, \
    load_blacklisted, BLACKLIST


def compute_overlap(
    eval_output_dir: str = "evaluation_output/swe-agent-demo3__swt_bench_lite__test/mode_vanilla",
    swe_bench_results: str = "results/experiments-swe-bench/20240402_sweagent_gpt4/results/results.json",
    dataset: str = "datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k",
    split: str = "test",
    log: callable = log,
):

    swe_bench_ress = json.load(open(swe_bench_results))["resolved"]
    swe_bench_ress = [res for res in swe_bench_ress if res not in BLACKLIST]
    good_cases = []

    dataset = load_from_disk(dataset)
    eval_output_by_instance = load_eval_outputs(eval_output_dir)
    for example in dataset[split]:
        instance_id = example["instance_id"]
        if instance_id in BLACKLIST:
            continue
        if instance_id not in eval_output_by_instance:
            continue
        eval_outputs = eval_output_by_instance[instance_id]
        coverage = extract_coverages_from_eval_output(eval_outputs)
        if len(coverage) != 4:
            continue

        ftp, etp, _, _, _ = extract_good_case_from_eval_output(eval_outputs)
        good_case = ftp or etp
        if good_case:
            good_cases.append(instance_id)

    print(" | ".join(map(str, (len(good_cases), len(swe_bench_ress), len(set(good_cases) & set(swe_bench_ress))))))

def main():
    print("           | SWT | SWE | Overlap ")
    print("-----------|-----|-----|---------")
    print(" SWE Agent | ", end="")
    compute_overlap(
        eval_output_dir="evaluation_output/swe-agent-demo3__swt_bench_lite__test/mode_vanilla",
        swe_bench_results="results/experiments-swe-bench/20240402_sweagent_gpt4/results/results.json",
        dataset="datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k",
        split="test",
        log=log,
    )
    print(" ZSB  | ", end="")
    compute_overlap(
        eval_output_dir="evaluation_output/gpt-4-1106-preview__swt_bench_lite_aug1_bm25_diff_27k_cl100k__seed=0,temperature=0__test/mode_vanillafuzzy/",
        swe_bench_results="results/experiments-swe-bench/20240402_rag_gpt4/results/results.json",
        dataset="datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k",
        split="test",
        log=log,
    )
    print(" ZSP  | ", end="")
    compute_overlap(
        eval_output_dir="evaluation_output/gpt-4-1106-preview__swt_bench_lite_aug1_bm25_27k_cl100k__seed=0,temperature=0__test/mode_custom/",
        swe_bench_results="results/experiments-swe-bench/zsp_swe-lite_bm25_gpt4.json",
        dataset="datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k",
        split="test",
        log=log,
    )


        


if __name__ == "__main__":
    fire.Fire(main)
