package org.grobid.core.analyzers;

import java.util.ArrayList;
import java.util.List;
import org.grobid.core.lang.Language;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wipo.nlp.textboundaries.ReTokenizer;
import org.wipo.nlp.textboundaries.ReTokenizerFactory;

/* loaded from: input_file:WEB-INF/lib/grobid-core-0.3.4.jar:org/grobid/core/analyzers/GrobidAnalyzer.class */
public class GrobidAnalyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(GrobidAnalyzer.class);
    private static volatile GrobidAnalyzer instance;
    private ReTokenizer jaAnalyzer = null;
    private ReTokenizer krAnalyzer = null;
    private ReTokenizer zhAnalyzer = null;

    public static GrobidAnalyzer getInstance() {
        if (instance == null && instance == null) {
            getNewInstance();
        }
        return instance;
    }

    private static synchronized void getNewInstance() {
        LOGGER.debug("Get new instance of GrobidAnalyzer");
        instance = new GrobidAnalyzer();
    }

    private GrobidAnalyzer() {
    }

    public List<String> tokenize(String str) throws Exception {
        return tokenize(null, str);
    }

    public List<String> tokenize(Language language, String str) throws Exception {
        List<String> list;
        if (str == null || str.length() == 0) {
            return new ArrayList();
        }
        if (language == null || language.getLangId() == null) {
            list = GrobidDefaultAnalyzer.tokenize(str);
        } else if (language.getLangId().equals("ja")) {
            if (this.jaAnalyzer == null) {
                this.jaAnalyzer = ReTokenizerFactory.create("ja_g");
            }
            list = this.jaAnalyzer.tokensAsList(str);
        } else if (language.getLangId().equals("zh") || language.getLangId().equals("zh-cn")) {
            if (this.zhAnalyzer == null) {
                this.zhAnalyzer = ReTokenizerFactory.create("zh_g");
            }
            list = this.zhAnalyzer.tokensAsList(str);
        } else if (language.getLangId().equals("kr")) {
            if (this.krAnalyzer == null) {
                this.krAnalyzer = ReTokenizerFactory.create("kr_g");
            }
            list = this.krAnalyzer.tokensAsList(str);
        } else if (language.getLangId().equals("ar")) {
            list = GrobidDefaultAnalyzer.tokenize(str);
            int i = 0;
            for (String str2 : list) {
                StringBuilder sb = new StringBuilder();
                for (int i2 = 0; i2 < str2.length(); i2++) {
                    sb.append(ArabicChars.arabicCharacters(str2.charAt(i2)));
                }
                list.set(i, sb.toString());
                i++;
            }
        } else {
            list = GrobidDefaultAnalyzer.tokenize(str);
        }
        return list;
    }

    public List<String> retokenize(List<String> list) throws Exception {
        return retokenize(null, list);
    }

    public List<String> retokenize(Language language, List<String> list) throws Exception {
        List<String> list2 = null;
        if (list == null || list.size() == 0) {
            return new ArrayList();
        }
        if (language == null || language.getLangId() == null) {
            list2 = GrobidDefaultAnalyzer.retokenize(list);
        } else if (!language.getLangId().equals("ja") && !language.getLangId().equals("zh") && !language.getLangId().equals("kr") && !language.getLangId().equals("ar")) {
            list2 = GrobidDefaultAnalyzer.retokenize(list);
        }
        return list2;
    }
}
