package org.apache.mahout.vectorizer.encoders;

import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.mahout.common.lucene.TokenStreamIterator;

/* JADX WARN: Classes with same name are omitted:
  input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.class
 */
/* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.class */
public class LuceneTextValueEncoder extends TextValueEncoder {
    private Analyzer analyzer;

    /* JADX WARN: Classes with same name are omitted:
      input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder$CharSequenceReader.class
     */
    /* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder$CharSequenceReader.class */
    private static final class CharSequenceReader extends Reader {
        private final CharBuffer buf;

        private CharSequenceReader(CharSequence charSequence) {
            int length = charSequence.length();
            this.buf = CharBuffer.allocate(length);
            for (int i = 0; i < length; i++) {
                this.buf.put(charSequence.charAt(i));
            }
            this.buf.rewind();
        }

        @Override // java.io.Reader
        public int read(char[] cArr, int i, int i2) {
            int min = Math.min(i2, this.buf.remaining());
            if (min <= 0) {
                return -1;
            }
            this.buf.get(cArr, i, min);
            return min;
        }

        @Override // java.io.Reader, java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder$LuceneTokenIterable.class
     */
    /* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder$LuceneTokenIterable.class */
    private static final class LuceneTokenIterable implements Iterable<String> {
        private boolean firstTime;
        private final TokenStream tokenStream;

        private LuceneTokenIterable(TokenStream tokenStream, boolean z) {
            this.firstTime = true;
            this.tokenStream = tokenStream;
            this.firstTime = z;
        }

        @Override // java.lang.Iterable
        public Iterator<String> iterator() {
            if (this.firstTime) {
                this.firstTime = false;
            } else {
                try {
                    this.tokenStream.reset();
                } catch (IOException e) {
                    throw new IllegalStateException("This token stream can't be reset");
                }
            }
            return new TokenStreamIterator(this.tokenStream);
        }
    }

    public LuceneTextValueEncoder(String str) {
        super(str);
    }

    public void setAnalyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    @Override // org.apache.mahout.vectorizer.encoders.TextValueEncoder
    protected Iterable<String> tokenize(CharSequence charSequence) {
        try {
            TokenStream tokenStream = this.analyzer.tokenStream(getName(), new CharSequenceReader(charSequence));
            tokenStream.addAttribute(CharTermAttribute.class);
            return new LuceneTokenIterable(tokenStream, false);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }
}
