package it.unimi.dsi.law.warc.tool;

import com.google.common.collect.Iterables;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.law.warc.filters.Filter;
import it.unimi.dsi.law.warc.filters.parser.FilterParser;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.HttpResponseFilteredIterator;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.parser.HTMLParser;
import it.unimi.dsi.law.warc.util.HttpResponse;
import it.unimi.dsi.law.warc.util.WarcHttpResponse;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/warc/tool/CountLinks.class */
public class CountLinks {
    private static final Logger LOGGER = LoggerFactory.getLogger(CountLinks.class);
    public static final String DEFAULT_BUFFER_SIZE = "64Ki";

    public static void run(FastBufferedInputStream fastBufferedInputStream, boolean z, Filter<HttpResponse> filter, PrintWriter printWriter) throws NoSuchAlgorithmException, IOException {
        WarcRecord gZWarcRecord = z ? new GZWarcRecord() : new WarcRecord();
        WarcHttpResponse warcHttpResponse = new WarcHttpResponse();
        HttpResponseFilteredIterator httpResponseFilteredIterator = new HttpResponseFilteredIterator(fastBufferedInputStream, gZWarcRecord, warcHttpResponse, filter);
        HTMLParser hTMLParser = new HTMLParser();
        HTMLParser.SetLinkReceiver setLinkReceiver = new HTMLParser.SetLinkReceiver();
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, 1L, TimeUnit.MINUTES, "pages");
        progressLogger.start("Counting...");
        while (httpResponseFilteredIterator.hasNext()) {
            httpResponseFilteredIterator.next();
            if (!warcHttpResponse.isDuplicate()) {
                try {
                    hTMLParser.parse(warcHttpResponse, setLinkReceiver);
                } catch (Exception e) {
                    LOGGER.warn(e.getMessage(), e);
                }
                printWriter.println(Iterables.size(setLinkReceiver));
                progressLogger.lightUpdate();
            }
        }
        progressLogger.done();
    }

    public static void main(String[] strArr) throws Exception {
        InputStream fileInputStream;
        SimpleJSAP simpleJSAP = new SimpleJSAP(CountLinks.class.getName(), "Count links in pages from a WARC file.", new Parameter[]{new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, "64Ki", false, 'b', "buffer-size", "The size of an input I/O buffer."), new Switch("gzip", 'z', "gzip", "Tells if the warc is compressed."), new FlaggedOption("filter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'f', "filter", "The filter."), new UnflaggedOption("warcFile", JSAP.STRING_PARSER, "-", true, false, "The WARC file basename (if not present, or -, stdin will be used).")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            System.exit(1);
        }
        boolean z = parse.getBoolean("gzip");
        String string = parse.getString("filter") == null ? "TRUE" : parse.getString("filter");
        String string2 = parse.getString("warcFile");
        Filter parse2 = new FilterParser(HttpResponse.class).parse(string);
        if (string2.equals("-")) {
            fileInputStream = System.in;
        } else {
            fileInputStream = new FileInputStream(new File(string2 + ".warc" + (z ? ".gz" : "")));
        }
        FastBufferedInputStream fastBufferedInputStream = new FastBufferedInputStream(fileInputStream, parse.getInt("bufferSize"));
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter((OutputStream) new FastBufferedOutputStream(System.out), "ASCII"));
        try {
            run(fastBufferedInputStream, z, parse2, printWriter);
            fastBufferedInputStream.close();
            printWriter.close();
        } catch (Throwable th) {
            fastBufferedInputStream.close();
            printWriter.close();
            throw th;
        }
    }
}
