package com.oxygenxml.terminology.checker.pos;

import com.google.common.collect.ImmutableMap;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;

/* loaded from: input_file:oxygen-terminology-checker-addon-4.2.1/lib/oxygen-terminology-checker-addon-4.2.1.jar:com/oxygenxml/terminology/checker/pos/POSWhitespaceTokenizer.class */
public class POSWhitespaceTokenizer {
    private static final Map<POSTaggerLangs, Locale> POS_TAGGING_LANG_TO_LOCALE = ImmutableMap.builder().put(POSTaggerLangs.EN, Locale.ENGLISH).put(POSTaggerLangs.DE, Locale.GERMAN).put(POSTaggerLangs.FR, Locale.FRENCH).put(POSTaggerLangs.IT, Locale.ITALIAN).put(POSTaggerLangs.NL, Locale.forLanguageTag("nl")).build();

    public static String[] mapToWords(List<WordSpan> list, CharSequence charSequence) {
        String[] strArr = new String[list.size()];
        int size = list.size();
        for (int i = 0; i < size; i++) {
            WordSpan wordSpan = list.get(i);
            strArr[i] = charSequence.subSequence(wordSpan.getStart(), wordSpan.getEnd()).toString();
        }
        return strArr;
    }

    public static List<WordSpan> tokenize(CharSequence charSequence, POSTaggerLangs pOSTaggerLangs) {
        BreakIterator wordInstance = BreakIterator.getWordInstance(POS_TAGGING_LANG_TO_LOCALE.getOrDefault(pOSTaggerLangs, Locale.ENGLISH));
        wordInstance.setText(new CharacterIteratorOverCharSequence(charSequence));
        ArrayList arrayList = new ArrayList();
        int first = wordInstance.first();
        while (-1 != first) {
            int i = first;
            first = wordInstance.next();
            if (first != -1 && Character.isLetterOrDigit(charSequence.charAt(i))) {
                arrayList.add(new WordSpan(i, first));
            }
        }
        return arrayList;
    }
}
