/*
 * Decompiled with CFR 0.152.
 */
package edu.mit.jmwe.detect.score;

import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IToken;
import edu.mit.jmwe.data.MWEPOS;
import edu.mit.jmwe.data.StopWords;
import edu.mit.jmwe.detect.score.AbstractScorer;
import edu.mit.jmwe.util.JWIPOS;
import edu.mit.jwi.IDictionary;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.IWordID;
import edu.mit.jwi.item.POS;
import edu.mit.jwi.morph.IStemmer;
import edu.mit.jwi.morph.WordnetStemmer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class LeskScore<T extends IToken>
extends AbstractScorer<IMWE<T>> {
    protected final Set<String> contextWords;
    protected final IDictionary dict;
    protected final IStemmer stemmer;
    protected static final Pattern whitespace = Pattern.compile("\\s+");
    protected static final Pattern punctuation = Pattern.compile("\\p{Punct}");

    public LeskScore(List<T> sentence, IDictionary dict) {
        if (dict == null) {
            throw new NullPointerException();
        }
        this.dict = dict;
        this.stemmer = new WordnetStemmer(dict);
        StringBuilder sb = new StringBuilder();
        for (IToken token : sentence) {
            sb.append(token.getForm() + " ");
        }
        List<String> contextWords = this.getContentWords(sb.toString());
        Set<String> contextSet = this.getStemmedWords(contextWords);
        this.contextWords = Collections.unmodifiableSet(contextSet);
    }

    @Override
    public double score(IMWE<T> mwe) {
        List<String> glosses = this.getGlosses(mwe.getEntry().getForm(), mwe.getEntry().getPOS());
        if (glosses.isEmpty()) {
            return 0.0;
        }
        double bestScore = 0.0;
        for (String gloss : glosses) {
            double overlap = this.overlap(gloss);
            if (!(overlap > bestScore)) continue;
            bestScore = overlap;
        }
        return bestScore;
    }

    protected List<String> getContentWords(String str) {
        str = punctuation.matcher(str.toLowerCase()).replaceAll("");
        String[] wordArray = whitespace.split(str);
        LinkedList<String> wordList = new LinkedList<String>(Arrays.asList(wordArray));
        Set<String> stopWords = this.getStopWords();
        Iterator i = wordList.iterator();
        while (i.hasNext()) {
            if (!stopWords.contains(i.next())) continue;
            i.remove();
        }
        return wordList;
    }

    protected Set<String> getStopWords() {
        return StopWords.get();
    }

    protected List<String> getGlosses(String lemma, MWEPOS pos) {
        POS p;
        POS pOS = p = pos.getIdentifier() == 'P' ? POS.NOUN : JWIPOS.toPOS(pos);
        if (p == null) {
            return Collections.emptyList();
        }
        IIndexWord word = this.dict.getIndexWord(lemma, p);
        if (word == null) {
            return Collections.emptyList();
        }
        ArrayList<String> glosses = new ArrayList<String>();
        for (IWordID id : word.getWordIDs()) {
            glosses.add(this.dict.getWord(id).getSynset().getGloss());
        }
        return glosses;
    }

    protected int overlap(String gloss) {
        List<String> wordList = this.getContentWords(gloss);
        Set<String> wordSet = this.getStemmedWords(wordList);
        wordSet.retainAll(this.contextWords);
        return wordSet.size();
    }

    protected Set<String> getStemmedWords(Collection<String> words) {
        HashSet<String> result = new HashSet<String>(words);
        result.removeAll(this.getStopWords());
        for (String word : words) {
            result.addAll(this.stemmer.findStems(word, null));
        }
        return result;
    }
}

