import random
import sys
sys.path.append("../..")
import jsonlines
from datasets import load_dataset

from backend.gpt import query_gpt4

NUM_SAMPLES = 30

query_func = query_gpt4

# load dataset
dataset = load_dataset("google/Synthetic-Persona-Chat", cache_dir='../../downloads/')
samples = dataset['train']
samples_list = []
for sample in samples:
    samples_list.append([sample['user 1 personas'], sample['user 2 personas'], sample['Best Generated Conversation']])

print("load {} samples from persona".format(str(len(samples_list))))

fw = jsonlines.open("./dataset_2hop_supp.jsonl", "a")

# norm utterances
def norm_name(text, person_a_name, person_b_name):
    text = text.lower().replace("user 1", person_a_name).replace("user 2", person_b_name).replace("'s name", "").replace(" name", "")
    ret = []
    for utterance in text.split("\n"):
        if "name" in utterance or "username" in utterance:
            print(utterance)
            norm_utterance_prompt = """
I will give a utterance in the conversation between {} and {}
please replace the name or username in the text with the correct name based on role and utterance.
You should identify whether each placeholder ([name]/[username]) should be filled with its own name or the other person's name
for example, if bob is talking with alice 
"bob: my name is [name]." should be replaced to "bob: my name is [bob].".
or
"bob: nice to meet you, username." should be replaced to "nice to meet you, alice".
here is the conversation:
{}
please only return the replaced text without adding anything else, just replace and return.
            """.format(person_a_name, person_b_name, utterance)
            utterance = query_func(norm_utterance_prompt)
            print(utterance)
        ret.append(utterance)
    text = "\n".join(ret)
    text = text.replace("(" + person_a_name + ")", person_a_name)
    text = text.replace("(" + person_b_name + ")", person_b_name)
    text = text.replace("[" + person_a_name + "]", person_a_name)
    text = text.replace("[" + person_b_name + "]", person_b_name)
    return text

for idx in range(NUM_SAMPLES):
    # random sample conversations
    ab = random.choice(samples_list)
    cd = random.choice(samples_list)
    insert_needle = random.choice(samples_list)
    insert_needle = random.choice(insert_needle[0].split("."))
    assert ab[-1] != cd[-1]
    persona = ab[0]
    personb = ab[1]
    personc = cd[0]
    persond = cd[1]
    while insert_needle in persona or insert_needle in persond:
        insert_needle = random.choice(samples_list)
        insert_needle = random.choice(insert_needle[0].split("."))

    chatab = norm_name(ab[-1], "alice", "bob")
    chatcd = norm_name(cd[-1], "charlie", "dave")

    print("norm conversations complete")
    print("Persona of Alice\n{}".format(persona))
    print("Persona of Bob\n{}".format(personb))
    print("Persona of Charlie\n{}".format(personc))
    print("Persona of Dave\n{}".format(persond))

    # add a common persona
    add_persona_prompt = """
Personas are aspects of the user’s character that provide insights into their personality, motivations, and behaviors.
A taxonomy of persona can be:
1. Demographics (Location, Employment, School, Family Status, Possession, Marital Status, Age, Gender)
2. Psychographics (Preference, Hobby, Personal Characteristics)
3. Wellness (Disease, Symptom)

here is the persona of alice:

{}

here is the persona of dave:

{}

now you need to try to create a new persona by adding some details on an existing persona, '{}'.
this new persona can be added to both alice and dave, but in either same or different ways.
1. If it was added in the same way, both alice and dave have this persona.
2. If it was added in different way, either alice or dave will have this persona, and the other will dislike/against/posses different kind of this persona.
Now you choose to add this persona in {} way.
The new persona should be
1. specific (It cannot be a description that is too common or too abstract, such as go outdoor)
2. full of imagination.
YOU SHOULD ONLY return 
1. the introduction of the persona with a short phase or short sentence.
2. and how it was added to alice and dave's personality.
    """.format(persona, persond, insert_needle, "same" if idx % 2 == 0 else "different")
    print(add_persona_prompt)
    print("-"*20)
    common_persona = query_func(add_persona_prompt)
    print(common_persona)

    # describe the common persona
    explain_persona_prompt = """
here is the persona of alice:

{}

here is the persona of dave:

{}

here is the new persona of alice and dave and how it was added to alice and dave:

{}


try to describe the new persona with a simple and interesting question.
the question should be as short as possible, but at the same time this question is sufficient to identify this newly added persona.
for example,
if the new persona is "Alice and Dave often go biking", then the question may be "what is the common sport interest of alice and bob?"
if the new persona is "Alice love fish soup while Dave hate it", then the question may be "what is the thing that alice loves while dave hates?"
if the new persona is "Alice is a math teacher and Dave is a science teacher", then the question may be "What subject is Alice good at but Bob is not good at?"
YOU SHOULD ONLY RETURN THE QUESTION.
    """.format(persona, persond, common_persona)
    print(explain_persona_prompt)
    print("-"*20)
    common_persona_question = query_func(explain_persona_prompt)
    print(common_persona_question)


    # insert persona into the conversations
    add_persona_ab_prompt = """
here is a conversation between alice and bob:

{}

here is the persona of alice:

{}

here is the persona of bob:

{}

here is a new persona about alice and dave:

{}

modify only one or two utterance of the conversation to ONLY include the alice part of the new persona.
you should make the modified conversation natural, fluent and coherent.
you can not modify the original persona.
you can not delete or ignore the personas mentioned in the original conversation
you can add some details to the conversation to make it longer, at least 20 turns of dialogue
YOU SHOULD ONLY RETURN THE modified conversation without omitting any utterance
    """
    print(add_persona_ab_prompt.format(chatab, persona, personb, common_persona))
    print("-"*10)
    modified_alice_bob_conversation = query_func(add_persona_ab_prompt.format(chatab, persona, personb, common_persona))
    print(modified_alice_bob_conversation)

    add_persona_cd_prompt = """
here is a conversation between charlie and dave:

{}

here is the persona of charlie:

{}

here is the persona of dave:

{}

here is a new persona about alice and dave:

{}

modify only one or two utterance of the conversation to ONLY include the dave part of the new persona.
you should make the modified conversation natural, fluent and coherent.
you can not modify the original persona.
you can not delete or ignore the personas mentioned in the original conversation.
you can add some details to the conversation to make it longer, at least 20 turns of dialogue
YOU SHOULD ONLY RETURN THE modified conversation without omitting any utterance
    """
    print(add_persona_cd_prompt.format(chatcd, personc, persond, common_persona))
    print("-"*10)
    modified_charlie_dave_conversation = query_func(add_persona_cd_prompt.format(chatcd, personc, persond, common_persona))
    print(modified_charlie_dave_conversation)

    # generate conversation between bob and charlie
    generate_conversation_bc_prompt = """
here is the persona of bob:

{}

here is the persona of charlie:

{}

now you need generate a conversation between bob and charlie.
the generated conversation should obey their persona.
you can add some details to the conversation to make it longer, at least 20 turns of dialogue
YOU SHOULD ONLY return the generated conversation between bob and charlie
    """
    print(generate_conversation_bc_prompt.format(personb, personc))
    print("-"*10)
    generated_bob_charlie = query_func(generate_conversation_bc_prompt.format(personb, personc))
    print(generated_bob_charlie)


    # norm the answer
    norm_answer_prompt = """
here is the context:

{}

here is the question:

{}

please return the answer to the question in one word or one phrase
    """
    print(norm_answer_prompt.format(common_persona, common_persona_question))
    print("-" * 10)
    answer = query_func(norm_answer_prompt.format(common_persona, common_persona_question))
    print(answer)

    sample = dict()
    sample['id'] = str(idx)
    sample['modified_alice_bob_conversation'] = modified_alice_bob_conversation
    sample['modified_charlie_dave_conversation'] = modified_charlie_dave_conversation
    sample['chat_bob_charlie'] = generated_bob_charlie
    sample['task_prompt'] = common_persona_question
    sample['answer'] = answer
    sample['needle_detail'] = common_persona
    fw.write(sample)


fw.close()