/*
 * Decompiled with CFR 0.152.
 */
package kea.main;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import kea.filters.KEAFilter;
import kea.filters.KEAPhraseFilter;
import kea.stemmers.SremovalStemmer;
import kea.stemmers.Stemmer;
import kea.stopwords.Stopwords;
import kea.stopwords.StopwordsEnglish;
import kea.util.Counter;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;

public class KEAKeyphraseExtractor
implements OptionHandler {
    String m_dirName = null;
    String m_modelName = null;
    String m_vocabulary = null;
    String m_vocabularyFormat = null;
    String m_documentLanguage = "en";
    String m_encoding = "default";
    boolean m_debug = false;
    KEAFilter m_KEAFilter = null;
    int m_numPhrases = 10;
    private Stemmer m_Stemmer = new SremovalStemmer();
    private Stopwords m_Stopwords = new StopwordsEnglish();
    boolean m_AdditionalInfo = false;
    boolean m_buildGlobal = false;

    public boolean getAdditionalInfo() {
        return this.m_AdditionalInfo;
    }

    public void setAdditionalInfo(boolean newAdditionalInfo) {
        this.m_AdditionalInfo = newAdditionalInfo;
    }

    public boolean getBuildGlobal() {
        return this.m_buildGlobal;
    }

    public void setBuildGlobal(boolean newBuildGlobal) {
        this.m_buildGlobal = newBuildGlobal;
    }

    public int getNumPhrases() {
        return this.m_numPhrases;
    }

    public Stemmer getStemmer() {
        return this.m_Stemmer;
    }

    public void setStemmer(Stemmer newStemmer) {
        this.m_Stemmer = newStemmer;
    }

    public Stopwords getStopwords() {
        return this.m_Stopwords;
    }

    public void setStopwords(Stopwords newStopwords) {
        this.m_Stopwords = newStopwords;
    }

    public void setNumPhrases(int newnumPhrases) {
        this.m_numPhrases = newnumPhrases;
    }

    public boolean getDebug() {
        return this.m_debug;
    }

    public void setDebug(boolean newdebug) {
        this.m_debug = newdebug;
    }

    public String getEncoding() {
        return this.m_encoding;
    }

    public void setEncoding(String newencoding) {
        this.m_encoding = newencoding;
    }

    public String getVocabulary() {
        return this.m_vocabulary;
    }

    public void setVocabulary(String newvocabulary) {
        this.m_vocabulary = newvocabulary;
    }

    public String getVocabularyFormat() {
        return this.m_vocabularyFormat;
    }

    public void setVocabularyFormat(String newvocabularyFormat) {
        this.m_vocabularyFormat = newvocabularyFormat;
    }

    public String getDocumentLanguage() {
        return this.m_documentLanguage;
    }

    public void setDocumentLanguage(String newdocumentLanguage) {
        this.m_documentLanguage = newdocumentLanguage;
    }

    public String getModelName() {
        return this.m_modelName;
    }

    public void setModelName(String newmodelName) {
        this.m_modelName = newmodelName;
    }

    public String getDirName() {
        return this.m_dirName;
    }

    public void setDirName(String newdirName) {
        this.m_dirName = newdirName;
    }

    /*
     * Enabled aggressive block sorting
     */
    public void setOptions(String[] options) throws Exception {
        String stopwordsString;
        String dirName = Utils.getOption((char)'l', (String[])options);
        if (dirName.length() <= 0) {
            this.setDirName(null);
            throw new Exception("Name of directory required argument.");
        }
        this.setDirName(dirName);
        String modelName = Utils.getOption((char)'m', (String[])options);
        if (modelName.length() <= 0) {
            this.setModelName(null);
            throw new Exception("Name of model required argument.");
        }
        this.setModelName(modelName);
        String vocabularyName = Utils.getOption((char)'v', (String[])options);
        if (vocabularyName.length() <= 0) {
            this.setVocabulary(null);
            throw new Exception("Name of vocabulary required argument.");
        }
        this.setVocabulary(vocabularyName);
        String vocabularyFormat = Utils.getOption((char)'f', (String[])options);
        if (!this.getVocabulary().equals("none")) {
            if (vocabularyFormat.length() <= 0) {
                this.setVocabularyFormat(null);
                throw new Exception("If a controlled vocabulary is used, format of vocabulary required argument (skos or text).");
            }
            if (!vocabularyFormat.equals("skos") && !vocabularyFormat.equals("text")) {
                throw new Exception("Unsupported format of vocabulary. It should be either \"skos\" or \"text\".");
            }
            this.setVocabularyFormat(vocabularyFormat);
        } else {
            this.setVocabularyFormat(null);
        }
        String encoding = Utils.getOption((char)'e', (String[])options);
        if (encoding.length() > 0) {
            this.setEncoding(encoding);
        } else {
            this.setEncoding("default");
        }
        String documentLanguage = Utils.getOption((char)'i', (String[])options);
        if (documentLanguage.length() > 0) {
            this.setDocumentLanguage(documentLanguage);
        } else {
            this.setDocumentLanguage("en");
        }
        String numPhrases = Utils.getOption((char)'n', (String[])options);
        if (numPhrases.length() > 0) {
            this.setNumPhrases(Integer.parseInt(numPhrases));
        } else {
            this.setNumPhrases(5);
        }
        String stemmerString = Utils.getOption((char)'t', (String[])options);
        if (stemmerString.length() > 0) {
            stemmerString = "kea.stemmers.".concat(stemmerString);
            this.setStemmer((Stemmer)Class.forName(stemmerString).newInstance());
        }
        if ((stopwordsString = Utils.getOption((char)'s', (String[])options)).length() > 0) {
            stopwordsString = "kea.stopwords.".concat(stopwordsString);
            this.setStopwords((Stopwords)Class.forName(stopwordsString).newInstance());
        }
        this.setDebug(Utils.getFlag((char)'d', (String[])options));
        this.setBuildGlobal(Utils.getFlag((char)'b', (String[])options));
        this.setAdditionalInfo(Utils.getFlag((char)'a', (String[])options));
        Utils.checkForRemainingOptions((String[])options);
    }

    public String[] getOptions() {
        String[] options = new String[21];
        int current = 0;
        options[current++] = "-l";
        options[current++] = this.getDirName();
        options[current++] = "-m";
        options[current++] = this.getModelName();
        options[current++] = "-v";
        options[current++] = this.getVocabulary();
        options[current++] = "-f";
        options[current++] = this.getVocabularyFormat();
        options[current++] = "-e";
        options[current++] = this.getEncoding();
        options[current++] = "-i";
        options[current++] = this.getDocumentLanguage();
        options[current++] = "-n";
        options[current++] = "" + this.getNumPhrases();
        options[current++] = "-t";
        options[current++] = this.getStemmer().getClass().getName();
        options[current++] = "-s";
        options[current++] = this.getStopwords().getClass().getName();
        if (this.getDebug()) {
            options[current++] = "-d";
        }
        if (this.getBuildGlobal()) {
            options[current++] = "-b";
        }
        if (this.getAdditionalInfo()) {
            options[current++] = "-a";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(13);
        newVector.addElement(new Option("\tSpecifies name of directory.", "l", 1, "-l <directory name>"));
        newVector.addElement(new Option("\tSpecifies name of model.", "m", 1, "-m <model name>"));
        newVector.addElement(new Option("\tSpecifies vocabulary name.", "v", 1, "-v <vocabulary name>"));
        newVector.addElement(new Option("\tSpecifies vocabulary format.", "f", 1, "-f <vocabulary format>"));
        newVector.addElement(new Option("\tSpecifies encoding.", "e", 1, "-e <encoding>"));
        newVector.addElement(new Option("\tSpecifies document language (en (default), es, de, fr).", "i", 1, "-i <document language>"));
        newVector.addElement(new Option("\tSpecifies number of phrases to be output (default: 5).", "n", 1, "-n"));
        newVector.addElement(new Option("\tSet the stemmer to use (default: SremovalStemmer).", "t", 1, "-t <name of stemmer class>"));
        newVector.addElement(new Option("\tSet the stopwords class to use (default: EnglishStopwords).", "s", 1, "-s <name of stopwords class>"));
        newVector.addElement(new Option("\tTurns debugging mode on.", "d", 0, "-d"));
        newVector.addElement(new Option("\tBuilds global dictionaries for computing TFIDF from the test collection.", "b", 0, "-b"));
        newVector.addElement(new Option("\tAlso write stemmed phrase and score into \".key\" file.", "a", 0, "-a"));
        return newVector.elements();
    }

    public Hashtable collectStems() throws Exception {
        Hashtable<String, Double> stems = new Hashtable<String, Double>();
        try {
            File dir = new File(this.m_dirName);
            String[] files = dir.list();
            int i = 0;
            while (i < files.length) {
                String stem;
                if (files[i].endsWith(".txt") && !stems.containsKey(stem = files[i].substring(0, files[i].length() - 4))) {
                    stems.put(stem, new Double(0.0));
                }
                ++i;
            }
        }
        catch (Exception e) {
            throw new Exception("Problem opening directory " + this.m_dirName);
        }
        return stems;
    }

    public void extractKeyphrases(Hashtable stems) throws Exception {
        Vector<Double> stats = new Vector<Double>();
        if (stems.size() == 0) {
            throw new Exception("Couldn't find any data!");
        }
        this.m_KEAFilter.setNumPhrases(this.m_numPhrases);
        this.m_KEAFilter.setVocabulary(this.m_vocabulary);
        this.m_KEAFilter.setVocabularyFormat(this.m_vocabularyFormat);
        this.m_KEAFilter.setDocumentLanguage(this.getDocumentLanguage());
        this.m_KEAFilter.setStemmer(this.m_Stemmer);
        this.m_KEAFilter.setStopwords(this.m_Stopwords);
        if (this.getVocabulary().equals("none")) {
            this.m_KEAFilter.m_NODEfeature = false;
        } else {
            this.m_KEAFilter.loadThesaurus(this.m_Stemmer, this.m_Stopwords);
        }
        FastVector atts = new FastVector(3);
        atts.addElement((Object)new Attribute("doc", null));
        atts.addElement((Object)new Attribute("keyphrases", null));
        atts.addElement((Object)new Attribute("filename", null));
        Instances data = new Instances("keyphrase_training_data", atts, 0);
        if (this.m_KEAFilter.m_Dictionary == null) {
            this.buildGlobalDictionaries(stems);
        }
        System.err.println("-- Extracting Keyphrases... ");
        Enumeration elem = stems.keys();
        while (elem.hasMoreElements()) {
            Instance inst;
            int c;
            InputStreamReader is;
            String str = (String)elem.nextElement();
            double[] newInst = new double[2];
            try {
                File txt = new File(String.valueOf(this.m_dirName) + "/" + str + ".txt");
                is = !this.m_encoding.equals("default") ? new InputStreamReader((InputStream)new FileInputStream(txt), this.m_encoding) : new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    txtStr.append((char)c);
                }
                newInst[0] = data.attribute(0).addStringValue(txtStr.toString());
            }
            catch (Exception e) {
                if (this.m_debug) {
                    System.err.println("Can't read document " + str + ".txt");
                }
                newInst[0] = Instance.missingValue();
            }
            try {
                File key = new File(String.valueOf(this.m_dirName) + "/" + str + ".key");
                is = !this.m_encoding.equals("default") ? new InputStreamReader((InputStream)new FileInputStream(key), this.m_encoding) : new InputStreamReader(new FileInputStream(key));
                StringBuffer keyStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    keyStr.append((char)c);
                }
                newInst[1] = data.attribute(1).addStringValue(keyStr.toString());
            }
            catch (Exception e) {
                if (this.m_debug) {
                    System.err.println("No existing keyphrases for stem " + str + ".");
                }
                newInst[1] = Instance.missingValue();
            }
            data.add(new Instance(1.0, newInst));
            this.m_KEAFilter.input(data.instance(0));
            data = data.stringFreeStructure();
            if (this.m_debug) {
                System.err.println("-- Document: " + str);
            }
            Instance[] topRankedInstances = new Instance[this.m_numPhrases];
            while ((inst = this.m_KEAFilter.output()) != null) {
                int index = (int)inst.value(this.m_KEAFilter.getRankIndex()) - 1;
                if (index >= this.m_numPhrases) continue;
                topRankedInstances[index] = inst;
            }
            if (this.m_debug) {
                System.err.println("-- Keyphrases and feature values:");
            }
            FileOutputStream out = null;
            PrintWriter printer = null;
            File key = new File(String.valueOf(this.m_dirName) + "/" + str + ".key");
            if (!key.exists()) {
                out = new FileOutputStream(String.valueOf(this.m_dirName) + "/" + str + ".key");
                printer = !this.m_encoding.equals("default") ? new PrintWriter(new OutputStreamWriter((OutputStream)out, this.m_encoding)) : new PrintWriter(out);
            }
            double numExtracted = 0.0;
            double numCorrect = 0.0;
            int i = 0;
            while (i < this.m_numPhrases) {
                if (topRankedInstances[i] != null) {
                    if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
                        numExtracted += 1.0;
                    }
                    if ((int)topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
                        numCorrect += 1.0;
                    }
                    if (printer != null) {
                        printer.print(topRankedInstances[i].stringValue(this.m_KEAFilter.getUnstemmedPhraseIndex()));
                        if (this.m_AdditionalInfo) {
                            printer.print("\t");
                            printer.print(topRankedInstances[i].stringValue(this.m_KEAFilter.getStemmedPhraseIndex()));
                            printer.print("\t");
                            printer.print(Utils.doubleToString((double)topRankedInstances[i].value(this.m_KEAFilter.getProbabilityIndex()), (int)4));
                        }
                        printer.println();
                    }
                    if (this.m_debug) {
                        System.err.println(topRankedInstances[i]);
                    }
                }
                ++i;
            }
            if (numExtracted > 0.0) {
                if (this.m_debug) {
                    System.err.println("-- " + numCorrect + " correct");
                }
                stats.addElement(new Double(numCorrect));
            }
            if (printer == null) continue;
            printer.flush();
            printer.close();
            out.close();
        }
        double[] st = new double[stats.size()];
        int i = 0;
        while (i < stats.size()) {
            st[i] = (Double)stats.elementAt(i);
            ++i;
        }
        double avg = Utils.mean((double[])st);
        double stdDev = Math.sqrt(Utils.variance((double[])st));
        System.err.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString((double)avg, (int)2) + " +/- " + Utils.doubleToString((double)stdDev, (int)2));
        System.err.println("Based on " + stats.size() + " documents");
    }

    private void buildGlobalDictionaries(Hashtable stems) throws Exception {
        System.err.println("--- Building global dictionaries from the test collection.. ");
        this.m_KEAFilter.m_Dictionary = new HashMap();
        Enumeration elem = stems.keys();
        while (elem.hasMoreElements()) {
            int c;
            String str = (String)elem.nextElement();
            File txt = new File(String.valueOf(this.m_dirName) + "/" + str + ".txt");
            InputStreamReader is = !this.m_encoding.equals("default") ? new InputStreamReader((InputStream)new FileInputStream(txt), this.m_encoding) : new InputStreamReader(new FileInputStream(txt));
            StringBuffer txtStr = new StringBuffer();
            while ((c = is.read()) != -1) {
                txtStr.append((char)c);
            }
            KEAPhraseFilter kpf = new KEAPhraseFilter();
            HashMap hash = this.m_KEAFilter.getPhrasesForDictionary(kpf.tokenize(txtStr.toString()));
            Iterator it = hash.keySet().iterator();
            while (it.hasNext()) {
                String phrase = (String)it.next();
                Counter counter = (Counter)this.m_KEAFilter.m_Dictionary.get(phrase);
                if (counter == null) {
                    this.m_KEAFilter.m_Dictionary.put(phrase, new Counter());
                    continue;
                }
                counter.increment();
            }
        }
    }

    public void loadModel() throws Exception {
        BufferedInputStream inStream = new BufferedInputStream(new FileInputStream(this.m_modelName));
        ObjectInputStream in = new ObjectInputStream(inStream);
        this.m_KEAFilter = (KEAFilter)((Object)in.readObject());
        if (this.m_buildGlobal) {
            if (this.m_debug) {
                System.err.println("-- The global dictionaries will be built from this test collection..");
            }
            this.m_KEAFilter.m_Dictionary = null;
        }
        in.close();
    }

    /*
     * Unable to fully structure code
     */
    public static void main(String[] ops) {
        block4: {
            kmb = new KEAKeyphraseExtractor();
            try {
                kmb.setOptions(ops);
                System.err.print("Extracting keyphrases with options: ");
                optionSettings = kmb.getOptions();
                i = 0;
                while (i < optionSettings.length) {
                    System.err.print(String.valueOf(optionSettings[i]) + " ");
                    ++i;
                }
                System.err.println();
                System.err.println("-- Loading the Model... ");
                kmb.loadModel();
                kmb.extractKeyphrases(kmb.collectStems());
                break block4;
            }
            catch (Exception e) {
                e.printStackTrace();
                System.err.println(e.getMessage());
                System.err.println("\nOptions:\n");
                en = kmb.listOptions();
                ** while (en.hasMoreElements())
            }
lbl-1000:
            // 1 sources

            {
                option = (Option)en.nextElement();
                System.err.println(option.synopsis());
                System.err.println(option.description());
                continue;
            }
        }
    }
}

