package edu.mit.jmwe.detect;

import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IRootMWEDesc;
import edu.mit.jmwe.data.IToken;
import edu.mit.jmwe.data.MWEComparator;
import edu.mit.jmwe.index.IMWEIndex;
import edu.mit.jmwe.index.MWEIndex;
import edu.mit.jmwe.util.ListComparator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:edu/mit/jmwe/detect/StopWords.class */
public class StopWords implements IMWEDetector {
    public static List<String> defaultIndex = Collections.unmodifiableList(Arrays.asList("and_then_R", "by_and_by_R", "as_to_J", "as_to_O", "as_is_V", "such_as_J", "as_it_is_R", "as_such_R", "at_will_R", "be_on_V", "be_with_it_V", "but_then_R", "in_for_J", "if_not_R", "in_on_R", "in_that_R", "in_this_R", "that_is_R", "be_with_it_V", "on_it_R", "to_it_R", "be_on_V", "in_on_R", "on_that_R", "such_that_J", "to_that_R", "with_that_R", "and_then_R", "but_then_R", "at_will_R", "with_that_R", "of_this_R", "of_it_R", "with_it_R", "of_that_R", "at_a_R", "this_and_that_R", "such_and_such_J", "in_the_R", "on_the_R", "such_as_R", "is_there_V", "was_there_V", "as_it_R", "of_a_N", "to_the_R"));
    private final IMWEIndex index;

    public StopWords() {
        this(getStopWordIndex());
    }

    public StopWords(IMWEIndex iMWEIndex) {
        if (iMWEIndex == null) {
            throw new NullPointerException();
        }
        this.index = iMWEIndex;
    }

    @Override // edu.mit.jmwe.detect.IMWEDetector
    public <T extends IToken> List<IMWE<T>> detect(List<T> list) {
        HashMap hashMap = new HashMap();
        ListComparator listComparator = new ListComparator(list);
        ArrayList<IRootMWEDesc> arrayList = new ArrayList();
        for (T t : list) {
            arrayList.clear();
            if (t.getStems() != null) {
                for (String str : t.getStems()) {
                    if (!str.equals(t.getForm().toLowerCase())) {
                        arrayList.addAll(this.index.get(str));
                    }
                }
            }
            arrayList.addAll(this.index.get(t.getForm().toLowerCase()));
            for (IRootMWEDesc iRootMWEDesc : arrayList) {
                Set set = (Set) hashMap.get(iRootMWEDesc);
                if (set == null) {
                    set = new HashSet();
                    hashMap.put(iRootMWEDesc, set);
                }
                set.add(new MWEBuilder(iRootMWEDesc, listComparator));
                MWEBuilder.fillSlots(set, t);
            }
        }
        LinkedList linkedList = new LinkedList();
        Iterator it = hashMap.values().iterator();
        while (it.hasNext()) {
            for (MWEBuilder mWEBuilder : (Set) it.next()) {
                if (mWEBuilder.isFull() && !linkedList.contains(mWEBuilder.toMWE())) {
                    linkedList.add(mWEBuilder.toMWE());
                }
            }
        }
        Collections.sort(linkedList, new MWEComparator(listComparator.getIndexMap()));
        return linkedList;
    }

    protected static IMWEIndex getStopWordIndex() {
        MWEIndex mWEIndex = new MWEIndex(defaultIndex);
        try {
            mWEIndex.open();
            return mWEIndex;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
