/*
 * Decompiled with CFR 0.152.
 */
package eval;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import utility.FuncUtils;

public class CoherenceEval {
    int[][] ww;
    public int numDocuments;
    public HashMap<String, Integer> word2IdVocabulary = new HashMap();
    public HashMap<Integer, String> id2WordVocabulary = new HashMap();
    public int windowSize = 100;
    public int windowNum = 0;
    int indexWord = 0;

    public void readTopTopicWordForVocabulary(String pathTopTopicFile) {
        BufferedReader br = null;
        try {
            String doc;
            br = new BufferedReader(new FileReader(pathTopTopicFile));
            while ((doc = br.readLine()) != null) {
                String[] words;
                if (doc.trim().length() == 0) continue;
                for (String word : words = doc.trim().split(" ")) {
                    if (this.word2IdVocabulary.containsKey(word)) continue;
                    this.word2IdVocabulary.put(word, this.indexWord);
                    this.id2WordVocabulary.put(this.indexWord, word);
                    ++this.indexWord;
                }
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public ArrayList<ArrayList<Integer>> readTopTopicWord(String pathTopTopicFile) {
        BufferedReader br = null;
        ArrayList<ArrayList<Integer>> topTopicWordList = new ArrayList<ArrayList<Integer>>();
        try {
            String doc;
            br = new BufferedReader(new FileReader(pathTopTopicFile));
            while ((doc = br.readLine()) != null) {
                if (doc.trim().length() == 0) continue;
                String[] words = doc.trim().split(" ");
                ArrayList<Integer> oneTopic = new ArrayList<Integer>();
                for (String word : words) {
                    oneTopic.add(this.word2IdVocabulary.get(word));
                }
                topTopicWordList.add(oneTopic);
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return topTopicWordList;
    }

    public void readWikipedia(String pathWikipediaFile) {
        BufferedReader br = null;
        try {
            String doc;
            int indexWord = -1;
            br = new BufferedReader(new FileReader(pathWikipediaFile));
            int senId = 0;
            while ((doc = br.readLine()) != null) {
                if (doc.trim().length() == 0) continue;
                if (senId % 1000 == 0) {
                    System.out.print(senId + " ");
                }
                ++senId;
                String[] words = doc.trim().split("\\s+");
                int docSize = words.length;
                if (docSize <= this.windowSize) {
                    for (int k = 0; k < docSize; ++k) {
                        if (!this.word2IdVocabulary.containsKey(words[k])) continue;
                        int n = this.word2IdVocabulary.get(words[k]);
                        for (int m = k + 1; m < docSize; ++m) {
                            if (!this.word2IdVocabulary.containsKey(words[m])) continue;
                            System.out.println();
                        }
                    }
                    ++this.windowNum;
                    continue;
                }
                for (int j = 0; j < docSize - this.windowSize + 1; ++j) {
                    for (int k = j; k < j + this.windowSize; ++k) {
                        if (!this.word2IdVocabulary.containsKey(words[k])) continue;
                        int n = this.word2IdVocabulary.get(words[k]);
                        for (int m = k + 1; m < j + this.windowSize; ++m) {
                            if (!this.word2IdVocabulary.containsKey(words[m])) continue;
                            System.out.println();
                        }
                    }
                    ++this.windowNum;
                }
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void readWikipediaWhole(String pathWikipediaFile) {
        BufferedReader br = null;
        int senId = 0;
        try {
            String doc;
            int indexWord = -1;
            br = new BufferedReader(new FileReader(pathWikipediaFile));
            System.out.println("began to read file from wikepedia");
            while ((doc = br.readLine()) != null) {
                int k;
                if (doc.trim().length() == 0) continue;
                if (senId % 10000 == 0) {
                    System.out.print("| ");
                }
                if (senId % 200000 == 0) {
                    System.out.println();
                }
                ++senId;
                String[] words = doc.trim().split("\\s+");
                int docSize = words.length;
                ArrayList<Integer> arr = new ArrayList<Integer>();
                for (k = 0; k < docSize; ++k) {
                    int wordId = -1;
                    if (!this.word2IdVocabulary.containsKey(words[k]) || arr.contains(wordId = this.word2IdVocabulary.get(words[k]).intValue())) continue;
                    arr.add(wordId);
                    int[] nArray = this.ww[wordId];
                    int n = wordId;
                    nArray[n] = nArray[n] + 1;
                }
                if (arr.size() == 0) continue;
                for (k = 0; k < arr.size() - 1; ++k) {
                    for (int m = k + 1; m < arr.size(); ++m) {
                        int[] nArray = this.ww[(Integer)arr.get(k)];
                        int n = (Integer)arr.get(m);
                        nArray[n] = nArray[n] + 1;
                        int[] nArray2 = this.ww[(Integer)arr.get(m)];
                        int n2 = (Integer)arr.get(k);
                        nArray2[n2] = nArray2[n2] + 1;
                    }
                }
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.windowNum = senId;
        if (this.windowNum == 1000000) {
            System.out.println("finish!");
        } else {
            System.out.println("the number of wikipedia documents: " + this.windowNum);
        }
    }

    public double computOneTopicPMI(ArrayList<Integer> topW) {
        double coh = 0.0;
        for (int i = 0; i < topW.size(); ++i) {
            for (int j = i + 1; j < topW.size(); ++j) {
                int id2;
                int id1 = topW.get(i);
                int comC = this.ww[id1][id2 = topW.get(j).intValue()];
                if (comC == 0) continue;
                double comP = comC;
                double id1P = this.ww[id1][id1];
                double id2P = this.ww[id2][id2];
                if (id1P == 0.0 || id2P == 0.0) {
                    System.out.println("function \"computOneTopicCoh\" Exception");
                }
                if (comP == 0.0) {
                    coh += 0.0;
                    continue;
                }
                coh += Math.log(comP * (double)this.windowNum / (id1P * id2P));
            }
        }
        return coh *= 2.0 / (double)(topW.size() * (topW.size() - 1));
    }

    public double computeCoherence(String path) {
        ArrayList<ArrayList<Integer>> topTopicWordList = this.readTopTopicWord(path);
        double coh = 0.0;
        for (int i = 0; i < topTopicWordList.size(); ++i) {
            coh += this.computOneTopicPMI(topTopicWordList.get(i));
        }
        return coh / (double)topTopicWordList.size();
    }

    public void evaluate(String patToWikipediaFileFile, String pathToTopTopicFiles, String suffix) throws Exception {
        File[] files;
        BufferedWriter writer = new BufferedWriter(new FileWriter(pathToTopTopicFiles + "/" + suffix + ".Coherence"));
        writer.write("Wikipedia file in: " + patToWikipediaFileFile + "\n\n");
        for (File file : files = new File(pathToTopTopicFiles).listFiles()) {
            if (!file.getName().endsWith(suffix)) continue;
            this.readTopTopicWordForVocabulary(file.getAbsolutePath());
        }
        this.ww = new int[this.word2IdVocabulary.size()][this.word2IdVocabulary.size()];
        this.readWikipediaWhole(patToWikipediaFileFile);
        ArrayList<Double> coherence = new ArrayList<Double>();
        for (File file : files) {
            if (!file.getName().endsWith(suffix)) continue;
            writer.write("Results for: " + file.getAbsolutePath() + "\n");
            double value = this.computeCoherence(file.getAbsolutePath());
            writer.write("\tCoherence: " + value + "\n");
            coherence.add(value);
            System.out.println(file + "--coherence---" + value);
        }
        if (coherence.size() == 0) {
            System.out.println("Error: There is no file ending with " + suffix);
            throw new Exception();
        }
        double[] coherenceValues = new double[coherence.size()];
        for (int i = 0; i < coherence.size(); ++i) {
            coherenceValues[i] = (Double)coherence.get(i);
        }
        writer.write("\n---\nMean Coherence: " + FuncUtils.mean(coherenceValues) + ", standard deviation: " + FuncUtils.stddev(coherenceValues));
        System.out.println("---\nMean Coherence: " + FuncUtils.mean(coherenceValues) + ", standard deviation: " + FuncUtils.stddev(coherenceValues));
        writer.close();
    }

    public static void main(String[] args) throws Exception {
        CoherenceEval ce = new CoherenceEval();
        ce.evaluate("dataset/wiki.en.text1000000", "results/", "topWords");
    }
}

