package org.wipo.analyzers.wipokr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.wipo.analyzers.wipokr.morph.AnalysisOutput;
import org.wipo.analyzers.wipokr.morph.MorphAnalyzer;
import org.wipo.analyzers.wipokr.morph.MorphException;
import org.wipo.analyzers.wipokr.morph.WordSpaceAnalyzer;

/* loaded from: input_file:WEB-INF/lib/wipo-analysers-0.0.1.jar:org/wipo/analyzers/wipokr/KoreanFilter.class */
public final class KoreanFilter extends TokenFilter {
    private static final boolean DECOMPOUND = false;
    private final LinkedList<Token> koreanQueue;
    private final LinkedList<Token> cjQueue;
    private final MorphAnalyzer morph;
    final WordSpaceAnalyzer wsAnal;
    private boolean bigrammable;
    private boolean hasOrigin;
    public boolean returnOnlyOne;
    private static final String APOSTROPHE_TYPE = KoreanTokenizerImpl.TOKEN_TYPES[1];
    private static final String ACRONYM_TYPE = KoreanTokenizerImpl.TOKEN_TYPES[2];
    private final CharTermAttribute termAttr;
    private final TypeAttribute typeAttr;
    private final PositionIncrementAttribute posAttr;
    private final OffsetAttribute offsetAttr;

    public KoreanFilter(TokenStream tokenStream) {
        super(tokenStream);
        this.bigrammable = true;
        this.hasOrigin = true;
        this.returnOnlyOne = true;
        this.termAttr = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.typeAttr = (TypeAttribute) addAttribute(TypeAttribute.class);
        this.posAttr = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.offsetAttr = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.koreanQueue = new LinkedList<>();
        this.cjQueue = new LinkedList<>();
        this.morph = new MorphAnalyzer();
        this.wsAnal = new WordSpaceAnalyzer();
    }

    public KoreanFilter(TokenStream tokenStream, boolean z) {
        this(tokenStream);
        this.bigrammable = z;
    }

    public KoreanFilter(TokenStream tokenStream, boolean z, boolean z2) {
        this(tokenStream, z);
        this.hasOrigin = z2;
    }

    private Token analysisKorean(Token token, int i) throws MorphException {
        String token2 = token.toString();
        List<AnalysisOutput> analyze = this.morph.analyze(token2);
        if (analyze.size() == 0) {
            return token;
        }
        HashMap<String, Integer> hashMap = new HashMap<>();
        if (this.hasOrigin) {
            hashMap.put(token2, new Integer(1));
        }
        for (int i2 = 0; i2 < analyze.size(); i2++) {
            AnalysisOutput analysisOutput = analyze.get(i2);
            if (!token.toString().equals(analysisOutput.getStem())) {
                if (token.toString().equals(analysisOutput.getStem() + analysisOutput.getJosa())) {
                    this.koreanQueue.add(new Token(analysisOutput.getStem(), token.startOffset(), token.startOffset() + analysisOutput.getStem().length(), KoreanTokenizer.TOKEN_TYPES[9]));
                    this.koreanQueue.add(new Token("−" + analysisOutput.getJosa(), token.startOffset() + analysisOutput.getStem().length() + 1, token.endOffset(), KoreanTokenizer.TOKEN_TYPES[11]));
                    return this.koreanQueue.removeFirst();
                }
                if (token.toString().equals(analysisOutput.getStem() + analysisOutput.getVsfx() + analysisOutput.getEomi())) {
                    this.koreanQueue.add(new Token(analysisOutput.getStem(), token.startOffset(), token.startOffset() + analysisOutput.getStem().length(), KoreanTokenizer.TOKEN_TYPES[9]));
                    this.koreanQueue.add(new Token("−" + analysisOutput.getVsfx() + analysisOutput.getEomi(), token.startOffset() + analysisOutput.getStem().length() + 1, token.endOffset(), KoreanTokenizer.TOKEN_TYPES[12]));
                    return this.koreanQueue.removeFirst();
                }
            }
        }
        if (this.returnOnlyOne) {
            return token;
        }
        if (analyze.get(0).getScore() == 100) {
            extractKeyword(analyze, hashMap);
        } else {
            try {
                List<AnalysisOutput> analyze2 = this.wsAnal.analyze(token2);
                List<AnalysisOutput> arrayList = new ArrayList<>();
                if (analyze2.size() > 1) {
                    for (AnalysisOutput analysisOutput2 : analyze2) {
                        if (this.hasOrigin) {
                            hashMap.put(analysisOutput2.getSource(), new Integer(1));
                        }
                        arrayList.addAll(this.morph.analyze(analysisOutput2.getSource()));
                    }
                } else {
                    arrayList.addAll(analyze2);
                }
                extractKeyword(arrayList, hashMap);
            } catch (Exception e) {
                extractKeyword(analyze, hashMap);
            }
        }
        int i3 = 0;
        for (String str : hashMap.keySet()) {
            int indexOf = token2.indexOf(str);
            Token token3 = new Token(str, token.startOffset() + (indexOf != -1 ? indexOf : 0), indexOf != -1 ? token.startOffset() + indexOf + str.length() : token.endOffset(), KoreanTokenizer.TOKEN_TYPES[8]);
            if (i3 == 0) {
                token3.setPositionIncrement(token.getPositionIncrement() + i);
            } else {
                token3.setPositionIncrement(0);
            }
            this.koreanQueue.add(token3);
            i3++;
        }
        if (this.koreanQueue.size() == 0) {
            return null;
        }
        return this.koreanQueue.removeFirst();
    }

    private void extractKeyword(List<AnalysisOutput> list, HashMap<String, Integer> hashMap) throws MorphException {
        for (AnalysisOutput analysisOutput : list) {
            if (analysisOutput.getPos() != 'V') {
                hashMap.put(analysisOutput.getStem(), new Integer(1));
            }
            if (this.bigrammable) {
                addBiagramToMap(analysisOutput.getStem(), hashMap);
            }
        }
    }

    private void addBiagramToMap(String str, HashMap<String, Integer> hashMap) {
        int i = 0;
        int length = str.length();
        while (i < length - 1) {
            if (isAlphaNumChar(str.charAt(i))) {
                String findAlphaNumeric = findAlphaNumeric(str.substring(i));
                hashMap.put(findAlphaNumeric, new Integer(0));
                i += findAlphaNumeric.length();
            } else {
                hashMap.put(str.substring(i, i + 2 > length ? length : i + 2), new Integer(0));
                i++;
            }
        }
    }

    private String findAlphaNumeric(String str) {
        int i = 0;
        for (int i2 = 0; i2 < str.length() && isAlphaNumChar(str.charAt(i2)); i2++) {
            i++;
        }
        return str.substring(0, i);
    }

    private Token analysisCJ(Token token, int i) throws MorphException {
        Token token2 = new Token(token.toString(), 0, token.endOffset(), KoreanTokenizer.TOKEN_TYPES[7]);
        token2.setPositionIncrement(token.getPositionIncrement() + i);
        this.cjQueue.add(token2);
        return this.cjQueue.removeFirst();
    }

    private Token analysisETC(Token token) throws MorphException {
        char[] buffer = token.buffer();
        int length = token.length();
        String type = token.type();
        if (type == APOSTROPHE_TYPE && length >= 2 && buffer[length - 2] == '\'' && (buffer[length - 1] == 's' || buffer[length - 1] == 'S')) {
            token.setLength(length - 2);
        } else if (type == ACRONYM_TYPE) {
            int i = 0;
            for (int i2 = 0; i2 < length; i2++) {
                char c = buffer[i2];
                if (c != '.') {
                    int i3 = i;
                    i++;
                    buffer[i3] = c;
                }
            }
            token.setLength(i);
        }
        return token;
    }

    private boolean isAlphaNumChar(int i) {
        if (i < 48 || i > 57) {
            return i >= 65 && i <= 122;
        }
        return true;
    }

    public void setHasOrigin(boolean z) {
        this.hasOrigin = z;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        if (this.koreanQueue.size() > 0) {
            Token removeFirst = this.koreanQueue.removeFirst();
            this.termAttr.setEmpty();
            this.termAttr.append(removeFirst.toString());
            this.typeAttr.setType(removeFirst.type());
            this.posAttr.setPositionIncrement(removeFirst.getPositionIncrement());
            this.offsetAttr.setOffset(removeFirst.startOffset(), removeFirst.endOffset());
            return true;
        }
        if (this.cjQueue.size() > 0) {
            Token removeFirst2 = this.cjQueue.removeFirst();
            this.termAttr.setEmpty();
            this.termAttr.append(removeFirst2.toString());
            this.typeAttr.setType(removeFirst2.type());
            this.posAttr.setPositionIncrement(removeFirst2.getPositionIncrement());
            this.offsetAttr.setOffset(removeFirst2.startOffset(), removeFirst2.endOffset());
            return true;
        }
        int i = 0;
        while (this.input.incrementToken()) {
            try {
                Token token = new Token();
                token.reinit(this.termAttr.toString(), this.offsetAttr.startOffset(), this.offsetAttr.endOffset(), this.typeAttr.type());
                Token analysisKorean = this.typeAttr.type().equals(KoreanTokenizer.TOKEN_TYPES[16]) ? analysisKorean(token, i) : analysisETC(token);
                if (analysisKorean != null) {
                    this.termAttr.setEmpty();
                    this.termAttr.append(analysisKorean.toString());
                    this.typeAttr.setType(analysisKorean.type());
                    this.posAttr.setPositionIncrement(analysisKorean.getPositionIncrement());
                    this.offsetAttr.setOffset(analysisKorean.startOffset(), analysisKorean.endOffset());
                    return true;
                }
                i++;
            } catch (MorphException e) {
                throw new IOException(e.getMessage());
            }
        }
        return false;
    }

    @Override // org.apache.lucene.analysis.TokenFilter, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.cjQueue.clear();
        this.koreanQueue.clear();
    }
}
