package org.apache.mahout.cf.taste.hadoop.similarity.item;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
import org.apache.mahout.math.map.OpenIntLongHashMap;

/* JADX WARN: Classes with same name are omitted:
  input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.class
 */
/* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.class */
public final class ItemSimilarityJob extends AbstractJob {
    public static final String ITEM_ID_INDEX_PATH_STR = ItemSimilarityJob.class.getName() + ".itemIDIndexPathStr";
    public static final String MAX_SIMILARITIES_PER_ITEM = ItemSimilarityJob.class.getName() + ".maxSimilarItemsPerItem";
    private static final int DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM = 100;
    private static final int DEFAULT_MAX_PREFS = 500;
    private static final int DEFAULT_MIN_PREFS_PER_USER = 1;

    /* JADX WARN: Classes with same name are omitted:
      input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob$MostSimilarItemPairsMapper.class
     */
    /* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob$MostSimilarItemPairsMapper.class */
    public static class MostSimilarItemPairsMapper extends Mapper<IntWritable, VectorWritable, EntityEntityWritable, DoubleWritable> {
        private OpenIntLongHashMap indexItemIDMap;
        private int maxSimilarItemsPerItem;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Mapper
        public void setup(Mapper<IntWritable, VectorWritable, EntityEntityWritable, DoubleWritable>.Context context) {
            Configuration configuration = context.getConfiguration();
            this.maxSimilarItemsPerItem = configuration.getInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, -1);
            this.indexItemIDMap = TasteHadoopUtils.readIDIndexMap(configuration.get(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR), configuration);
            Preconditions.checkArgument(this.maxSimilarItemsPerItem > 0, "maxSimilarItemsPerItem must be greater then 0!");
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Mapper
        public void map(IntWritable intWritable, VectorWritable vectorWritable, Mapper<IntWritable, VectorWritable, EntityEntityWritable, DoubleWritable>.Context context) throws IOException, InterruptedException {
            int i = intWritable.get();
            TopSimilarItemsQueue topSimilarItemsQueue = new TopSimilarItemsQueue(this.maxSimilarItemsPerItem);
            for (Vector.Element element : vectorWritable.get().nonZeroes()) {
                SimilarItem similarItem = (SimilarItem) topSimilarItemsQueue.top();
                double d = element.get();
                if (d > similarItem.getSimilarity()) {
                    similarItem.set(this.indexItemIDMap.get(element.index()), d);
                    topSimilarItemsQueue.updateTop();
                }
            }
            long j = this.indexItemIDMap.get(i);
            for (SimilarItem similarItem2 : topSimilarItemsQueue.getTopItems()) {
                long itemID = similarItem2.getItemID();
                if (j < itemID) {
                    context.write(new EntityEntityWritable(j, itemID), new DoubleWritable(similarItem2.getSimilarity()));
                } else {
                    context.write(new EntityEntityWritable(itemID, j), new DoubleWritable(similarItem2.getSimilarity()));
                }
            }
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:BOOT-INF/classes/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob$MostSimilarItemPairsReducer.class
     */
    /* loaded from: input_file:BOOT-INF/lib/libarx-3.7.1.jar:org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob$MostSimilarItemPairsReducer.class */
    public static class MostSimilarItemPairsReducer extends Reducer<EntityEntityWritable, DoubleWritable, EntityEntityWritable, DoubleWritable> {
        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void reduce(EntityEntityWritable entityEntityWritable, Iterable<DoubleWritable> iterable, Reducer<EntityEntityWritable, DoubleWritable, EntityEntityWritable, DoubleWritable>.Context context) throws IOException, InterruptedException {
            context.write(entityEntityWritable, iterable.iterator().next());
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new ItemSimilarityJob(), strArr);
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
        addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number (default: 100)", String.valueOf(100));
        addOption("maxPrefs", "mppu", "max number of preferences to consider per user or item, users or items with more preferences will be sampled down (default: 500)", String.valueOf(500));
        addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this (default: 1)", String.valueOf(1));
        addOption(RecommenderJob.BOOLEAN_DATA, "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE));
        addOption(DefaultOptionCreator.THRESHOLD_OPTION, "tr", "discard item pairs with a similarity value below this", false);
        addOption("randomSeed", (String) null, "use this seed for sampling", false);
        Map<String, List<String>> parseArguments = parseArguments(strArr);
        if (parseArguments == null) {
            return -1;
        }
        String option = getOption("similarityClassname");
        int parseInt = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
        int parseInt2 = Integer.parseInt(getOption("maxPrefs"));
        int parseInt3 = Integer.parseInt(getOption("minPrefsPerUser"));
        boolean booleanValue = Boolean.valueOf(getOption(RecommenderJob.BOOLEAN_DATA)).booleanValue();
        double parseDouble = hasOption(DefaultOptionCreator.THRESHOLD_OPTION) ? Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION)) : Double.MIN_VALUE;
        long parseLong = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed")) : Long.MIN_VALUE;
        Path tempPath = getTempPath("similarityMatrix");
        Path tempPath2 = getTempPath("prepareRatingMatrix");
        AtomicInteger atomicInteger = new AtomicInteger();
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{"--input", getInputPath().toString(), "--output", tempPath2.toString(), "--minPrefsPerUser", String.valueOf(parseInt3), "--booleanData", String.valueOf(booleanValue), "--tempDir", getTempPath().toString()});
        }
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            ToolRunner.run(getConf(), new RowSimilarityJob(), new String[]{"--input", new Path(tempPath2, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", tempPath.toString(), "--numberOfColumns", String.valueOf(HadoopUtil.readInt(new Path(tempPath2, PreparePreferenceMatrixJob.NUM_USERS), getConf())), "--similarityClassname", option, "--maxObservationsPerRow", String.valueOf(parseInt2), "--maxObservationsPerColumn", String.valueOf(parseInt2), "--maxSimilaritiesPerRow", String.valueOf(parseInt), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(parseDouble), "--randomSeed", String.valueOf(parseLong), "--tempDir", getTempPath().toString()});
        }
        if (!shouldRunNextPhase(parseArguments, atomicInteger)) {
            return 0;
        }
        Job prepareJob = prepareJob(tempPath, getOutputPath(), SequenceFileInputFormat.class, MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class);
        Configuration configuration = prepareJob.getConfiguration();
        configuration.set(ITEM_ID_INDEX_PATH_STR, new Path(tempPath2, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
        configuration.setInt(MAX_SIMILARITIES_PER_ITEM, parseInt);
        return !prepareJob.waitForCompletion(true) ? -1 : 0;
    }
}
