package org.apache.mahout.fpm.pfpgrowth;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.PriorityQueue;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.apache.mahout.math.list.IntArrayList;

/* loaded from: input_file:libarx-3.7.1.jar:org/apache/mahout/fpm/pfpgrowth/PFPGrowth.class */
public final class PFPGrowth {
    public static final String ENCODING = "encoding";
    public static final String F_LIST = "fList";
    public static final String NUM_GROUPS = "numGroups";
    public static final int NUM_GROUPS_DEFAULT = 1000;
    public static final String MAX_PER_GROUP = "maxPerGroup";
    public static final String OUTPUT = "output";
    public static final String MIN_SUPPORT = "minSupport";
    public static final String MAX_HEAP_SIZE = "maxHeapSize";
    public static final String INPUT = "input";
    public static final String PFP_PARAMETERS = "pfp.parameters";
    public static final String FILE_PATTERN = "part-*";
    public static final String FP_GROWTH = "fpgrowth";
    public static final String FREQUENT_PATTERNS = "frequentpatterns";
    public static final String PARALLEL_COUNTING = "parallelcounting";
    public static final String SPLIT_PATTERN = "splitPattern";
    public static final String USE_FPG2 = "use_fpg2";
    public static final Pattern SPLITTER = Pattern.compile("[ ,\t]*[,|\t][ ,\t]*");

    private PFPGrowth() {
    }

    public static List<Pair<String, Long>> readFList(Configuration configuration) throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        Path[] cachedFiles = HadoopUtil.getCachedFiles(configuration);
        if (cachedFiles.length != 1) {
            throw new IOException("Cannot read Frequency list from Distributed Cache (" + cachedFiles.length + ')');
        }
        Iterator it = new SequenceFileIterable(cachedFiles[0], true, configuration).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            newArrayList.add(new Pair(((Text) pair.getFirst()).toString(), Long.valueOf(((LongWritable) pair.getSecond()).get())));
        }
        return newArrayList;
    }

    public static void saveFList(Iterable<Pair<String, Long>> iterable, Parameters parameters, Configuration configuration) throws IOException {
        Path path = new Path(parameters.get("output"), F_LIST);
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        Path makeQualified = fileSystem.makeQualified(path);
        HadoopUtil.delete(configuration, makeQualified);
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, makeQualified, Text.class, LongWritable.class);
        try {
            for (Pair<String, Long> pair : iterable) {
                writer.append((Writable) new Text(pair.getFirst()), (Writable) new LongWritable(pair.getSecond().longValue()));
            }
            DistributedCache.addCacheFile(makeQualified.toUri(), configuration);
        } finally {
            writer.close();
        }
    }

    public static List<Pair<String, Long>> readFList(Parameters parameters) {
        int intValue = Integer.valueOf(parameters.get("minSupport", "3")).intValue();
        Configuration configuration = new Configuration();
        Path path = new Path(parameters.get("output"), PARALLEL_COUNTING);
        PriorityQueue priorityQueue = new PriorityQueue(11, new Comparator<Pair<String, Long>>() { // from class: org.apache.mahout.fpm.pfpgrowth.PFPGrowth.1
            @Override // java.util.Comparator
            public int compare(Pair<String, Long> pair, Pair<String, Long> pair2) {
                int compareTo = pair2.getSecond().compareTo(pair.getSecond());
                return compareTo != 0 ? compareTo : pair.getFirst().compareTo(pair2.getFirst());
            }
        });
        Iterator it = new SequenceFileDirIterable(new Path(path, FILE_PATTERN), PathType.GLOB, null, null, true, configuration).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            long j = ((LongWritable) pair.getSecond()).get();
            if (j >= intValue) {
                priorityQueue.add(new Pair(((Text) pair.getFirst()).toString(), Long.valueOf(j)));
            }
        }
        ArrayList newArrayList = Lists.newArrayList();
        while (!priorityQueue.isEmpty()) {
            newArrayList.add(priorityQueue.poll());
        }
        return newArrayList;
    }

    public static int getGroup(int i, int i2) {
        return i / i2;
    }

    public static IntArrayList getGroupMembers(int i, int i2, int i3) {
        int i4 = i * i2;
        int i5 = i4 + i2;
        if (i5 > i3) {
            i5 = i3;
        }
        IntArrayList intArrayList = new IntArrayList();
        for (int i6 = i4; i6 < i5; i6++) {
            intArrayList.add(i6);
        }
        return intArrayList;
    }

    public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters parameters) throws IOException {
        Configuration configuration = new Configuration();
        Path path = new Path(parameters.get("output"), FREQUENT_PATTERNS);
        FileStatus[] globStatus = FileSystem.get(path.toUri(), configuration).globStatus(new Path(path, FILE_PATTERN));
        ArrayList newArrayList = Lists.newArrayList();
        for (FileStatus fileStatus : globStatus) {
            newArrayList.addAll(FPGrowth.readFrequentPattern(configuration, fileStatus.getPath()));
        }
        return newArrayList;
    }

    public static void runPFPGrowth(Parameters parameters, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        startParallelCounting(parameters, configuration);
        List<Pair<String, Long>> readFList = readFList(parameters);
        saveFList(readFList, parameters, configuration);
        int i = parameters.getInt(NUM_GROUPS, 1000);
        int size = readFList.size() / i;
        if (readFList.size() % i != 0) {
            size++;
        }
        parameters.set(MAX_PER_GROUP, Integer.toString(size));
        startParallelFPGrowth(parameters, configuration);
        startAggregating(parameters, configuration);
    }

    public static void runPFPGrowth(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        runPFPGrowth(parameters, new Configuration());
    }

    public static void startAggregating(Parameters parameters, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set(PFP_PARAMETERS, parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        Path path = new Path(parameters.get("output"), FP_GROWTH);
        Job job = new Job(configuration, "PFP Aggregator Driver running over input: " + path);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath(job, path);
        Path path2 = new Path(parameters.get("output"), FREQUENT_PATTERNS);
        FileOutputFormat.setOutputPath(job, path2);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(AggregatorMapper.class);
        job.setCombinerClass(AggregatorReducer.class);
        job.setReducerClass(AggregatorReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        HadoopUtil.delete(configuration, path2);
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("Job failed!");
        }
    }

    public static void startParallelCounting(Parameters parameters, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set(PFP_PARAMETERS, parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        String str = parameters.get("input");
        Job job = new Job(configuration, "Parallel Counting Driver running over input: " + str);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        FileInputFormat.addInputPath(job, new Path(str));
        Path path = new Path(parameters.get("output"), PARALLEL_COUNTING);
        FileOutputFormat.setOutputPath(job, path);
        HadoopUtil.delete(configuration, path);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ParallelCountingMapper.class);
        job.setCombinerClass(ParallelCountingReducer.class);
        job.setReducerClass(ParallelCountingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("Job failed!");
        }
    }

    public static void startParallelFPGrowth(Parameters parameters, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set(PFP_PARAMETERS, parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        Path path = new Path(parameters.get("input"));
        Job job = new Job(configuration, "PFP Growth Driver running over input" + path);
        job.setJarByClass(PFPGrowth.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(TransactionTree.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath(job, path);
        Path path2 = new Path(parameters.get("output"), FP_GROWTH);
        FileOutputFormat.setOutputPath(job, path2);
        HadoopUtil.delete(configuration, path2);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ParallelFPGrowthMapper.class);
        job.setCombinerClass(ParallelFPGrowthCombiner.class);
        job.setReducerClass(ParallelFPGrowthReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("Job failed!");
        }
    }
}
