/*
 * Decompiled with CFR 0.152.
 */
package eval;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import utility.FuncUtils;

public class ClusteringEval {
    String pathDocTopicProsFile;
    String pathGoldenLabelsFile;
    HashMap<String, Set<Integer>> goldenClusers;
    HashMap<String, Set<Integer>> outputClusers;
    int numDocs;

    public ClusteringEval(String inPathGoldenLabelsFile, String inPathDocTopicProsFile) throws Exception {
        this.pathDocTopicProsFile = inPathDocTopicProsFile;
        this.pathGoldenLabelsFile = inPathGoldenLabelsFile;
        this.goldenClusers = new HashMap();
        this.outputClusers = new HashMap();
        this.readGoldenLabelsFile();
        this.readDocTopicProsFile();
    }

    public void readGoldenLabelsFile() throws Exception {
        System.out.println("Reading golden labels file " + this.pathGoldenLabelsFile);
        int id = 0;
        BufferedReader br = null;
        try {
            String label;
            br = new BufferedReader(new FileReader(this.pathGoldenLabelsFile));
            while ((label = br.readLine()) != null) {
                label = label.trim();
                Set<Integer> ids = new HashSet<Integer>();
                if (this.goldenClusers.containsKey(label)) {
                    ids = this.goldenClusers.get(label);
                }
                ids.add(id);
                this.goldenClusers.put(label, ids);
                ++id;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.numDocs = id;
    }

    public void readDocTopicProsFile() throws Exception {
        System.out.println("Reading document-to-topic distribution file " + this.pathDocTopicProsFile);
        HashMap<Integer, String> docLabelOutput = new HashMap<Integer, String>();
        int docIndex = 0;
        BufferedReader br = null;
        try {
            String docTopicProbs;
            br = new BufferedReader(new FileReader(this.pathDocTopicProsFile));
            while ((docTopicProbs = br.readLine()) != null) {
                String[] pros = docTopicProbs.trim().split("\\s+");
                double maxPro = 0.0;
                int index = -1;
                for (int topicIndex = 0; topicIndex < pros.length; ++topicIndex) {
                    double pro = new Double(pros[topicIndex]);
                    if (!(pro > maxPro)) continue;
                    maxPro = pro;
                    index = topicIndex;
                }
                docLabelOutput.put(docIndex, "Topic_" + new Integer(index).toString());
                ++docIndex;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        if (this.numDocs != docIndex) {
            System.out.println("Error: the number of documents is different to the number of labels!");
            throw new Exception();
        }
        for (Integer id : docLabelOutput.keySet()) {
            String label = (String)docLabelOutput.get(id);
            Set<Integer> ids = new HashSet<Integer>();
            if (this.outputClusers.containsKey(label)) {
                ids = this.outputClusers.get(label);
            }
            ids.add(id);
            this.outputClusers.put(label, ids);
        }
    }

    public double computePurity() {
        int count = 0;
        for (String label : this.outputClusers.keySet()) {
            Set<Integer> docs = this.outputClusers.get(label);
            int correctAssignedDocNum = 0;
            for (String goldenLabel : this.goldenClusers.keySet()) {
                Set<Integer> goldenDocs = this.goldenClusers.get(goldenLabel);
                HashSet<Integer> outputDocs = new HashSet<Integer>(docs);
                outputDocs.retainAll(goldenDocs);
                if (outputDocs.size() < correctAssignedDocNum) continue;
                correctAssignedDocNum = outputDocs.size();
            }
            count += correctAssignedDocNum;
        }
        double value = (double)count * 1.0 / (double)this.numDocs;
        System.out.println("\tPurity accuracy: " + value);
        return value;
    }

    public double computeNMIscore() {
        Set<Integer> docs;
        double MIscore = 0.0;
        for (String label : this.outputClusers.keySet()) {
            Iterator<String> docs2 = this.outputClusers.get(label);
            for (String goldenLabel : this.goldenClusers.keySet()) {
                Set<Integer> goldenDocs = this.goldenClusers.get(goldenLabel);
                HashSet<String> outputDocs = new HashSet<String>((Collection<String>)((Object)docs2));
                outputDocs.retainAll(goldenDocs);
                double numCorrectAssignedDocs = (double)outputDocs.size() * 1.0;
                if (numCorrectAssignedDocs == 0.0) continue;
                MIscore += numCorrectAssignedDocs / (double)this.numDocs * Math.log(numCorrectAssignedDocs * (double)this.numDocs / (double)(docs2.size() * goldenDocs.size()));
            }
        }
        double entropy = 0.0;
        for (String label : this.outputClusers.keySet()) {
            docs = this.outputClusers.get(label);
            entropy += -1.0 * (double)docs.size() / (double)this.numDocs * Math.log(1.0 * (double)docs.size() / (double)this.numDocs);
        }
        for (String label : this.goldenClusers.keySet()) {
            docs = this.goldenClusers.get(label);
            entropy += -1.0 * (double)docs.size() / (double)this.numDocs * Math.log(1.0 * (double)docs.size() / (double)this.numDocs);
        }
        double value = 2.0 * MIscore / entropy;
        System.out.println("\tNMI score: " + value);
        return value;
    }

    public static void evaluate(String pathGoldenLabelsFile, String pathToFolderOfDocTopicProsFiles, String suffix) throws Exception {
        int i;
        BufferedWriter writer = new BufferedWriter(new FileWriter(pathToFolderOfDocTopicProsFiles + "/" + suffix + ".PurityNMI"));
        writer.write("Golden-labels in: " + pathGoldenLabelsFile + "\n\n");
        File[] files = new File(pathToFolderOfDocTopicProsFiles).listFiles();
        ArrayList<Double> purity = new ArrayList<Double>();
        ArrayList<Double> nmi = new ArrayList<Double>();
        for (File file : files) {
            if (!file.getName().endsWith(suffix)) continue;
            writer.write("Results for: " + file.getAbsolutePath() + "\n");
            ClusteringEval dce = new ClusteringEval(pathGoldenLabelsFile, file.getAbsolutePath());
            double value = dce.computePurity();
            writer.write("\tPurity: " + value + "\n");
            purity.add(value);
            value = dce.computeNMIscore();
            writer.write("\tNMI: " + value + "\n");
            nmi.add(value);
        }
        if (purity.size() == 0 || nmi.size() == 0) {
            System.out.println("Error: There is no file ending with " + suffix);
            throw new Exception();
        }
        double[] purityValues = new double[purity.size()];
        double[] nmiValues = new double[nmi.size()];
        for (i = 0; i < purity.size(); ++i) {
            purityValues[i] = (Double)purity.get(i);
        }
        for (i = 0; i < nmi.size(); ++i) {
            nmiValues[i] = (Double)nmi.get(i);
        }
        writer.write("\n---\nMean purity: " + FuncUtils.mean(purityValues) + ", standard deviation: " + FuncUtils.stddev(purityValues));
        writer.write("\nMean NMI: " + FuncUtils.mean(nmiValues) + ", standard deviation: " + FuncUtils.stddev(nmiValues));
        System.out.println("---\nMean purity: " + FuncUtils.mean(purityValues) + ", standard deviation: " + FuncUtils.stddev(purityValues));
        System.out.println("Mean NMI: " + FuncUtils.mean(nmiValues) + ", standard deviation: " + FuncUtils.stddev(nmiValues));
        writer.close();
    }

    public static void main(String[] args) throws Exception {
        ClusteringEval.evaluate("dataset/Pascal_Flickr_LABEL.txt", "results", "theta");
    }
}

