import random

import jsonlines
from datasets import load_dataset

from backend.gpt import query_gpt4

NUM_SAMPLES = 30

query_func = query_gpt4

# load dataset
dataset = load_dataset("google/Synthetic-Persona-Chat", cache_dir='./downloads/')
samples = dataset['train']
samples_list = []
for sample in samples:
    samples_list.append([sample['user 1 personas'], sample['user 2 personas'], sample['Best Generated Conversation']])

print("load {} samples from persona".format(str(len(samples_list))))

fw = jsonlines.open("./exp/dataset_1hop_supp.jsonl", "w")

# norm utterances
def norm_name(text, person_a_name, person_b_name):
    text = text.lower().replace("user 1", person_a_name).replace("user 2", person_b_name).replace("'s name", "").replace(" name", "")
    ret = []
    for utterance in text.split("\n"):
        if "name" in utterance or "username" in utterance:
            print(utterance)
            norm_utterance_prompt = """
I will give a utterance in the conversation between {} and {}
please replace the name or username in the text with the correct name based on role and utterance.
You should identify whether each placeholder ([name]/[username]) should be filled with its own name or the other person's name
for example, if bob is talking with alice 
"bob: my name is [name]." should be replaced to "bob: my name is [bob].".
or
"bob: nice to meet you, username." should be replaced to "nice to meet you, alice".
here is the conversation:
{}
please only return the replaced text without adding anything else, just replace and return.
            """.format(person_a_name, person_b_name, utterance)
            utterance = query_func(norm_utterance_prompt)
            print(utterance)
        ret.append(utterance)
    text = "\n".join(ret)
    text = text.replace("(" + person_a_name + ")", person_a_name)
    text = text.replace("(" + person_b_name + ")", person_b_name)
    text = text.replace("[" + person_a_name + "]", person_a_name)
    text = text.replace("[" + person_b_name + "]", person_b_name)
    return text

for idx in range(NUM_SAMPLES):
    # random sample conversations
    ab = random.choice(samples_list)
    cd = random.choice(samples_list)
    assert ab[-1] != cd[-1]
    persona = ab[0]
    personb = ab[1]
    personc = cd[0]
    persond = cd[1]

    chatab = norm_name(ab[-1], "alice", "bob")
    chatcd = norm_name(cd[-1], "charlie", "dave")

    print("norm conversations complete")
    print("Persona of Alice\n{}".format(persona))
    print("Persona of Bob\n{}".format(personb))
    print("Persona of Charlie\n{}".format(personc))
    print("Persona of Dave\n{}".format(persond))

    generate_fact_prompt = """
here is a conversation between alice and bob:
{}
here is some facts about alice:
{}
now you need to choose a fact.
the fact can be deduced from the conversation between alice and bob.

YOU MUST ONLY RETURN THE fact.
""".format(chatab,
           persona)

    print(generate_fact_prompt)
    print("-"*20)
    answer = query_func(generate_fact_prompt)
    print(answer)

    generate_question_prompt = """
here is a conversation between alice and bob:
{}
here is a chosen fact which can be deduced from the conversation between alice and bob.
{}
Now you need to generate a detailed question asking about alice, and the answer to this question is the chosen fact.
for example, 'what is the sport interest of alice' or 'how old is alice'.
the answer to this question, which is the chosen fact, should be unambiguous.

YOU MUST ONLY RETURN THE question.
""".format(chatab,
           answer)

    print(generate_question_prompt)
    print("-"*20)
    question = query_func(generate_question_prompt)
    print(question)

    # generate conversation between bob and charlie
    generate_conversation_bc_prompt = """
here is the persona of bob:

{}

here is the persona of charlie:

{}

now you need generate a conversation between bob and charlie.
the generated conversation should obey their persona.
you can add some details to the conversation to make it longer, at least 20 turns of dialogue
YOU SHOULD ONLY return the generated conversation between bob and charlie
    """
    print(generate_conversation_bc_prompt.format(personb, personc))
    print("-"*10)
    generated_bob_charlie = query_func(generate_conversation_bc_prompt.format(personb, personc))
    print(generated_bob_charlie)

    sample = dict()
    sample['id'] = str(idx)
    sample['modified_alice_bob_conversation'] = chatab
    sample['modified_charlie_dave_conversation'] = chatcd
    sample['chat_bob_charlie'] = generated_bob_charlie
    sample['task_prompt'] = question
    sample['answer'] = answer
    fw.write(sample)


fw.close()