import json
import csv
scale = 30

# 1代表半个小时
single_activities = {
    "Breakfast": {"duration": 1, "is_available": 0},
    "Lunch": {"duration": 2, "is_available": 0},
    "Dinner": {"duration": 2, "is_available": 0},
    "Sleep": {"duration": 16, "is_available": 0},
    "Exercise": {"duration": 3, "is_available": 1},
    "Work": {"duration": 6, "is_available": 0},
    "Housekeeping": {"duration": 2, "is_available": 1},
    "Meditation": {"duration": 1, "is_available": 1},
    "Yoga": {"duration": 3, "is_available": 1},
    "Grocery Shopping": {"duration": 2, "is_available": 1},
    "Reading": {"duration": 1, "is_available": 1},
    "Watching TV": {"duration": 3, "is_available": 1},
    "Listening to music": {"duration": 2, "is_available": 1},
    "Playing video games": {"duration": 2, "is_available": 1},
    "Doing board games": {"duration": 2, "is_available": 1},
    "Walking the dog": {"duration": 2, "is_available": 1},
    "Gardening": {"duration": 3, "is_available": 1},
    "Laundry": {"duration": 2, "is_available": 1},
    "Online Shopping": {"duration": 2, "is_available": 1}
}

shared_activities = {
  "Team Meeting": {"duration": 2, "is_available": 0, "participants": 3, "start_set": [20, 40]},
  "Group Exercise": {"duration": 2, "is_available": 0, "participants": 3, "start_set": [14, 40]},
  #"Family Dinner": {"duration": 2, "is_available": 0, "participants": 3, "start_set": [36, 40]},
  "Conference Call": {"duration": 2, "is_available": 0, "participants": 3, "start_set": [16, 36]},
  "Movie Night": {"duration": 4, "is_available": 0, "participants": 2, "start_set": [36, 44]},
  "Group Study": {"duration": 4, "is_available": 0, "participants": 3, "start_set": [16, 40]},
  "Business Presentation": {"duration": 2, "is_available": 0, "participants": 3, "start_set": [16, 36]},
  "Camping Trip": {"duration": 10, "is_available": 0, "participants": 3, "start_set": [14, 34]},
  "Cooking Class": {"duration": 4, "is_available": 0, "participants": 3, "start_set": [20, 40]},
  "Book Club Meeting": {"duration": 4, "is_available": 0, "participants": 3, "start_set": [20, 40]}
}

def generate_names(total_people):
    names = []
    for idx in range(scale):
        for char in "abcdefghijklmnopqrstuvwxyz".upper()[:total_people]:
            names.append(char + str(idx))
    return names

# 写入JSONL文件
def write_jsonl(data_list, filename):
    with open(filename, 'w') as f:
        for entry in data_list:
            json.dump(entry, f)
            f.write('\n')

# 读取JSONL文件
def read_jsonl(filename):
    data_list = []
    with open(filename, 'r') as f:
        for line in f:
            data_list.append(json.loads(line.strip()))
    return data_list

def find_longest_activity_name(schedule_dict, single_activities, shared_activities):
    # all_activities = {**single_activities, **shared_activities}
    ans = 0
    ret = set()
    for agent in schedule_dict.keys():
        for activity in schedule_dict[agent].keys():
            if "Sleep" in activity:
                continue
            ans = max(ans, (schedule_dict[agent][activity]['end'] - schedule_dict[agent][activity]['start']))
    for agent in schedule_dict.keys():
        for activity in schedule_dict[agent].keys():
            if "Sleep" in activity:
                continue
            if (schedule_dict[agent][activity]['end'] - schedule_dict[agent][activity]['start']) == ans:
                ret.add(activity[:-1])
    return ret

def schedule_dict_2_str(name, schedule_dict):
    schedule_str = ''
    schedule_list = []
    sorted_schedule = dict(sorted(schedule_dict.items(), key=lambda item: item[1]['start']))
    for activity, details in sorted_schedule.items():
        start_hour = details['start'] // 2
        start_minute = '00' if details['start'] % 2 == 0 else '30'
        end_hour = details['end'] // 2
        end_minute = '00' if details['end'] % 2 == 0 else '30'
        participants = ' and '.join([p for p in details['participants_list'] if p != name])
        if details['type'] == 0:  # single person activity
            schedule_list.append(f"{start_hour}:{start_minute}-{end_hour}:{end_minute} {activity[:-1]}")
        else:  # multi-person activity
            schedule_list.append(f"{start_hour}:{start_minute}-{end_hour}:{end_minute} {activity[:-1]} with {participants}")
    return schedule_list

def main():
    total_people = 6
    write_data_list = []
    name_pool = generate_names(total_people)
    schedule_list = read_jsonl('schedule_data_list.jsonl')
    # print(name_pool)
    for i in range(scale):
        name_begin_p = i * total_people
        # print(f'name begin p: {name_begin_p}')
        name_group = name_pool[name_begin_p:name_begin_p + total_people] #本组total_people个人的名字

        schedule_nl = []
        for name in name_group:
            schedule_nl.append({name: schedule_dict_2_str(name, schedule_list[i][name])})
        

        # print(f'name group: {name_group}')
        tmp_all = {}
        tmp_all['id'] = i+1
        tmp_all['schedule'] = schedule_list[i]
        tmp_all['schedule_nl'] = schedule_nl
        message_list = []

        with open('dialogue.csv', 'r') as file:
            reader = csv.reader(file)
            for idx, row in enumerate(reader):
                if idx == 0:
                    continue
                sender, receiver, message = row
                if sender in name_group and receiver in name_group:
                    message_list.append("from {} to {} : {}".format(sender, receiver, message))
        tmp_all['message'] = message_list
        tmp_all['QA agents'] = [name_group[0], name_group[total_people // 2]]
        tmp_all['question'] = 'Please find out the activity with longest duration (except sleep) on the schedule of all people you had communication with. You two may known different people and you need to gather all of them. If there are multiple activity with the same longest duration, list all of them.'
        activity_set = find_longest_activity_name(schedule_list[i], single_activities, shared_activities)
        tmp_all['answer'] = str(activity_set)
        write_data_list.append(tmp_all)
        print(activity_set)

    write_jsonl(write_data_list, 'dataset_medium.jsonl')


main()
