package co.cask.directives.nlp;

import co.cask.cdap.api.annotation.Description;
import co.cask.cdap.api.annotation.Name;
import co.cask.cdap.api.annotation.Plugin;
import co.cask.directives.nlp.internal.PorterStemmer;
import co.cask.wrangler.api.Arguments;
import co.cask.wrangler.api.Directive;
import co.cask.wrangler.api.DirectiveExecutionException;
import co.cask.wrangler.api.DirectiveParseException;
import co.cask.wrangler.api.ExecutorContext;
import co.cask.wrangler.api.Row;
import co.cask.wrangler.api.annotations.Categories;
import co.cask.wrangler.api.parser.ColumnName;
import co.cask.wrangler.api.parser.TokenType;
import co.cask.wrangler.api.parser.UsageDefinition;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.avro.file.DataFileConstants;
import org.apache.xmlbeans.impl.jam.xml.JamXmlElements;

@Name(Stemming.NAME)
@Plugin(type = Directive.Type)
@Categories(categories = {"nlp"})
@Description("Apply Porter Stemming on the column value.")
/* loaded from: input_file:co/cask/directives/nlp/Stemming.class */
public class Stemming implements Directive {
    public static final String NAME = "stemming";
    private String column;
    private PorterStemmer stemmer;

    @Override // co.cask.wrangler.api.Directive
    public UsageDefinition define() {
        UsageDefinition.Builder builder = UsageDefinition.builder(NAME);
        builder.define(JamXmlElements.COLUMN, TokenType.COLUMN_NAME);
        return builder.build();
    }

    @Override // co.cask.wrangler.api.Executor
    public void initialize(Arguments arguments) throws DirectiveParseException {
        this.column = ((ColumnName) arguments.value(JamXmlElements.COLUMN)).value();
        this.stemmer = new PorterStemmer();
    }

    @Override // co.cask.wrangler.api.Executor
    public void destroy() {
    }

    @Override // co.cask.wrangler.api.Executor
    public List<Row> execute(List<Row> list, ExecutorContext executorContext) throws DirectiveExecutionException {
        for (Row row : list) {
            ArrayList arrayList = new ArrayList();
            int find = row.find(this.column);
            if (find != -1) {
                Object value = row.getValue(find);
                if (value == null || !((value instanceof List) || (value instanceof String[]) || (value instanceof String))) {
                    Object[] objArr = new Object[3];
                    objArr[0] = toString();
                    objArr[1] = value != null ? value.getClass().getName() : DataFileConstants.NULL_CODEC;
                    objArr[2] = this.column;
                    throw new DirectiveExecutionException(String.format("%s : Invalid type '%s' of column '%s'. Should be of type String, String[] or List<String>.", objArr));
                }
                try {
                    row.add(String.format("%s_porter", this.column), this.stemmer.process(value instanceof String[] ? Arrays.asList((String[]) value) : value instanceof List ? (List) value : Arrays.asList(((String) value).split("\\W+"))));
                } catch (IOException e) {
                    throw new DirectiveExecutionException(String.format("%s : Unable to apply porter stemmer on column '%s'. %s", toString(), this.column, e.getMessage()));
                }
            } else {
                row.add(String.format("%s_porter", this.column), arrayList);
            }
        }
        return list;
    }
}
