import json
import os

categories_ids = {
    "adult": 0,
    "car": 1,
    "ball/sports_ball": 2,
    "child": 3,
    "toy": 4,
    "cat": 5,
    "hamster/rat": 6,
    "bread": 7,
    "cup": 8,
    "bottle": 9,
    "baby": 10,
    "ski": 11,
    "bird": 12,
    "snowboard": 13,
    "table": 14,
    "screen/monitor": 15,
    "chair": 16,
    "dog": 17,
    "backpack": 18,
    "piano": 19,
    "stool": 20,
    "bus/truck": 21,
    "baby_seat": 22,
    "chicken": 23,
    "sofa": 24,
    "aircraft": 25,
    "handbag": 26,
    "horse": 27,
    "dish": 28,
    "camera": 29,
    "squirrel": 30,
    "guitar": 31,
    "baby_walker": 32,
    "bench": 33,
    "cellphone": 34,
    "laptop": 35,
    "electric_fan": 36,
    "motorcycle": 37,
    "skateboard": 38,
    "cattle/cow": 39,
    "suitcase": 40,
    "cake": 41,
    "stop_sign": 42,
    "surfboard": 43,
    "camel": 44,
    "fish": 45,
    "sheep/goat": 46,
    "bat": 47,
    "faucet": 48,
    "sink": 49,
    "train": 50,
    "rabbit": 51,
    "crab": 52,
    "tiger": 53,
    "bicycle": 54,
    "elephant": 55,
    "fruits": 56,
    "panda": 57,
    "traffic_light": 58,
    "turtle": 59,
    "bear": 60,
    "racket": 61,
    "penguin": 62,
    "watercraft": 63,
    "lion": 64,
    "refrigerator": 65,
    "scooter": 66,
    "toilet": 67,
    "duck": 68,
    "stingray": 69,
    "oven": 70,
    "vegetables": 71,
    "pig": 72,
    "frisbee": 73,
    "snake": 74,
    "kangaroo": 75,
    "leopard": 76,
    "antelope": 77,
    "microwave": 78,
    "crocodile": 79,
}

save_file = "../../data/vidstg/val.json"
files = json.load(open("../../data/vidstg/annotations/val_files.json"))
vidstg_annotations = json.load(
    open("../../data/vidstg/annotations/val_annotations.json")
)
vidstg = json.load(open("../../data/vidstg/annotations/val.json"))
vidor_annotation_dir = "../data/vidor/training/"

vidstg_vids = [
    k["original_video_id"]
    for k in vidstg["videos"]
    if (
        k["qtype"] == "declarative" and k["original_video_id"] in vidstg["trajectories"]
    )
]

categories = [{"id": categories_ids[k], "name": k} for k in categories_ids]

video_id = 0
ann_id = 0

annotations = []
videos = []

for k in os.walk(vidor_annotation_dir):

    for ann_file in k[-1]:

        if ann_file[:-5] in files and ann_file[:-5] in vidstg_vids:
            print(video_id)
            vidor_json = json.load(open(os.path.join(k[0], ann_file)))

            video = {}
            video["width"] = vidor_json["width"]
            video["height"] = vidor_json["height"]

            video["path"] = (
                k[0].replace(vidor_annotation_dir, "") + "/" + ann_file[:-5] + ".mp4"
            )
            video["id"] = video_id

            vidstg_ann_vid = [
                vk for vk in vidstg_annotations if vk["vid"] == ann_file[:-5]
            ]
            all_vid_frames = [
                range(k["temporal_gt"]["begin_fid"], k["temporal_gt"]["end_fid"])
                for k in vidstg_ann_vid
            ]

            video["start_frame"] = min([k[0] for k in all_vid_frames])
            video["end_frame"] = max([k[-1] for k in all_vid_frames])
            video["length"] = vidor_json["frame_count"]
            videos.append(video)

            for ann_itr, ann in enumerate(vidor_json["subject/objects"]):
                vidstg_ann = [
                    vk
                    for vk in vidstg_ann_vid
                    if vk["captions"][0]["target_id"] == ann["tid"]
                ]

                selected_frames = range(video["start_frame"], video["end_frame"])

                ann_new = {}
                ann_new["height"] = vidor_json["height"]
                ann_new["width"] = vidor_json["width"]
                ann_new["length"] = len(selected_frames)  # vidor_json["frame_count"]
                ann_new["start_frame"] = selected_frames[0]
                ann_new["category_id"] = categories_ids[ann["category"]]
                ann_new["iscrowd"] = 0
                ann_new["id"] = ann_id
                ann_new["video_id"] = video_id
                ann_new["original_video_id"] = ann_file[:-5]
                ann_new["target_id"] = ann["tid"]

                ann_id = ann_id + 1

                if vidstg_ann != []:

                    ann_new["captions"] = [
                        k["captions"][0]["description"] for k in vidstg_ann
                    ]
                else:
                    ann_new["captions"] = []

                trajectory = []
                areas = []
                for frame in range(vidor_json["frame_count"]):
                    if frame in selected_frames:
                        bbox = [0.0, 0.0, 0.0, 0.0]
                        area = 0
                        for v in vidor_json["trajectories"][frame]:
                            if v["tid"] == ann["tid"]:
                                bbox = [
                                    float(v["bbox"]["xmin"]),
                                    float(v["bbox"]["ymin"]),
                                    float(v["bbox"]["xmax"]) - float(v["bbox"]["xmin"]),
                                    float(v["bbox"]["ymax"]) - float(v["bbox"]["ymin"]),
                                ]
                                area = (
                                    float(v["bbox"]["xmax"]) - float(v["bbox"]["xmin"])
                                ) * (
                                    float(v["bbox"]["ymax"]) - float(v["bbox"]["ymin"])
                                )

                        trajectory.append(bbox)
                        areas.append(area)

                ann_new["bboxes"] = trajectory
                ann_new["bbox_format"] = "xywh"
                ann_new["areas"] = areas

                annotations.append(ann_new)
            video_id = video_id + 1


save_data = {"categories": categories, "videos": videos, "annotations": annotations}
json.dump(save_data, open(save_file, "w"))
