### playing around with node2vec

import networkx as nx
from node2vec import Node2Vec
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
import numpy.random as nprand
import pandas as pd

# Create a simple SBM
n = 100
nodes = [n, n]
p = 0.5
q = 0.005
probs = [ [p, q], [q, p]]

# graph = nx.stochastic_block_model(nodes, probs)


graph = nx.planted_partition_graph(2, n,  0.5, 0.005)

a = [[0] * n, [1] * n ]
truth = sum(a, [])

node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)  # Use temp_folder for big graphs

# Embed nodes
model = node2vec.fit(window=10, min_count=1, batch_words=4)

emb_df = (
    pd.DataFrame(
        [model.wv.get_vector(str(n)) for n in graph.nodes()],
        index = graph.nodes
    )
)

X = emb_df.values

## then do k-means on that

kmeans = KMeans(n_clusters=2, n_init = 10).fit(X)

## then would want to compare these to the truth
adjusted_rand_score(kmeans.labels_, truth)
print(adjusted_mutual_info_score(kmeans.labels_, truth))

