package org.lockss.crawler;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;
import junit.textui.TestRunner;
import org.apache.commons.collections.set.ListOrderedSet;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.lockss.config.ConfigManager;
import org.lockss.config.Configuration;
import org.lockss.config.CurrentConfig;
import org.lockss.crawler.FollowLinkCrawler;
import org.lockss.daemon.ConfigParamDescr;
import org.lockss.daemon.CrawlWindow;
import org.lockss.daemon.Crawler;
import org.lockss.daemon.LoginPageChecker;
import org.lockss.daemon.PermissionChecker;
import org.lockss.daemon.PluginException;
import org.lockss.plugin.ArchivalUnit;
import org.lockss.plugin.AuTestUtil;
import org.lockss.plugin.AuUtil;
import org.lockss.plugin.ContentValidationException;
import org.lockss.plugin.FilterFactory;
import org.lockss.plugin.UrlFetcher;
import org.lockss.state.AuState;
import org.lockss.state.MockAuState;
import org.lockss.state.SubstanceChecker;
import org.lockss.test.ConfigurationUtil;
import org.lockss.test.DelimitedLinkExtractor;
import org.lockss.test.LockssTestCase;
import org.lockss.test.MockArchivalUnit;
import org.lockss.test.MockCachedUrlSet;
import org.lockss.test.MockCrawlRule;
import org.lockss.test.MockCrawler;
import org.lockss.test.MockLinkExtractor;
import org.lockss.test.MockLockssDaemon;
import org.lockss.test.MockPlugin;
import org.lockss.test.MockUrlFetcher;
import org.lockss.test.StringInputStream;
import org.lockss.util.CIProperties;
import org.lockss.util.ListUtil;
import org.lockss.util.MapUtil;
import org.lockss.util.RegexpUtil;
import org.lockss.util.SetUtil;
import org.lockss.util.StringUtil;
import org.lockss.util.UrlUtil;
import org.lockss.util.time.TimeBase;
import org.lockss.util.urlconn.CacheException;
import org.lockss.util.urlconn.CacheSuccess;

/* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler.class */
public class TestFollowLinkCrawler extends LockssTestCase {
    private MockLockssDaemon theDaemon;
    private CrawlManagerImpl crawlMgr;
    private MockPlugin plug;
    private MockAuState aus;
    private static List testUrlList = ListUtil.list(new String[]{"http://example.com"});
    private List startUrls;
    private static final String PARAM_RETRY_TIMES = "org.lockss.crawler.retryCount";
    private static final int DEFAULT_RETRY_TIMES = 3;
    private static final String CW_URL1 = "http://www.example.com/link1.html";
    private static final String CW_URL2 = "http://www.example.com/link2.html";
    private static final String CW_URL3 = "http://www.example.com/link3.html";
    private MyMockArchivalUnit mau = null;
    private MockCachedUrlSet mcus = null;
    private MockCrawlRule crawlRule = null;
    private String startUrl = "http://www.example.com/index.html";
    private TestableFollowLinkCrawler crawler = null;
    private MockLinkExtractor extractor = new MockLinkExtractor();
    protected String permissionUrl = "http://www.example.com/permission.html";
    String nsurl1 = "http://www.example.com/one.html";
    String nsurl2 = "http://www.example.com/two.xml";
    String nsurl3 = "http://www.example.com/three.xml";
    String nsurl4 = "http://www.example.com/four.html";
    String nsurl5 = "http://www.example.com/redir1.html";

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyBaseCrawlSeed.class */
    class MyBaseCrawlSeed extends BaseCrawlSeed {
        List<String> startUrls;

        MyBaseCrawlSeed(ArchivalUnit archivalUnit) {
            super(archivalUnit);
        }

        public Collection<String> doGetStartUrls() throws ArchivalUnit.ConfigurationException, PluginException, IOException {
            if (this.startUrls == null) {
                TestFollowLinkCrawler.log.critical("doGetStartUrls: " + super.doGetStartUrls());
                return super.doGetStartUrls();
            }
            TestFollowLinkCrawler.log.critical("doGetStartUrls: " + this.startUrls);
            return this.startUrls;
        }

        void setStartUrls(List<String> list) {
            this.startUrls = list;
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyFiltFact.class */
    private static class MyFiltFact implements FilterFactory {
        int skip;

        MyFiltFact(int i) {
            this.skip = i;
        }

        public InputStream createFilteredInputStream(ArchivalUnit archivalUnit, InputStream inputStream, String str) throws PluginException {
            try {
                String fromInputStream = StringUtil.fromInputStream(inputStream);
                Vector breakAt = StringUtil.breakAt(fromInputStream, ' ');
                for (int i = 0; i < this.skip && !breakAt.isEmpty(); i++) {
                    breakAt.remove(0);
                }
                String separatedString = StringUtil.separatedString(breakAt, " ");
                TestFollowLinkCrawler.log.debug2("before: " + fromInputStream + ", after: " + separatedString);
                return new StringInputStream(separatedString);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyLoginPageChecker.class */
    static class MyLoginPageChecker implements LoginPageChecker {
        MyLoginPageChecker() {
        }

        public boolean isLoginPage(Properties properties, Reader reader) {
            return false;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockArchivalUnit.class */
    public class MyMockArchivalUnit extends MockArchivalUnit {
        MyMockUrlFetcher lastMmuf;

        private MyMockArchivalUnit() {
        }

        @Override // org.lockss.test.MockArchivalUnit
        protected MockUrlFetcher makeMockUrlFetcher(MockCrawler.MockCrawlerFacade mockCrawlerFacade, String str) {
            this.lastMmuf = new MyMockUrlFetcher(mockCrawlerFacade, str);
            return this.lastMmuf;
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockCacheException.class */
    private class MyMockCacheException extends CacheException {
        public MyMockCacheException(String str) {
            super(str);
        }

        public void setFailing() {
            this.attributeBits.set(1);
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockCrawlWindow.class */
    private class MyMockCrawlWindow implements CrawlWindow {
        int numTimesToReturnTrue;

        public MyMockCrawlWindow(int i) {
            this.numTimesToReturnTrue = 0;
            this.numTimesToReturnTrue = i;
        }

        public boolean canCrawl() {
            if (this.numTimesToReturnTrue <= 0) {
                return false;
            }
            this.numTimesToReturnTrue--;
            return true;
        }

        public boolean canCrawl(Date date) {
            throw new UnsupportedOperationException("not implemented");
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockPermissionChecker.class */
    public class MyMockPermissionChecker implements PermissionChecker {
        int numPermissionGranted;

        MyMockPermissionChecker(int i) {
            this.numPermissionGranted = 0;
            this.numPermissionGranted = i;
        }

        public void setNumPermissionGranted(int i) {
            this.numPermissionGranted = i;
        }

        public boolean checkPermission(Crawler.CrawlerFacade crawlerFacade, Reader reader, String str) {
            int i = this.numPermissionGranted;
            this.numPermissionGranted = i - 1;
            return i > 0;
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockRetryableCacheException.class */
    private class MyMockRetryableCacheException extends CacheException.RetryableException {
        private int retryCount;
        private long retryDelay;

        public MyMockRetryableCacheException(String str) {
            super(str);
            this.retryCount = -1;
            this.retryDelay = -1L;
        }

        public MyMockRetryableCacheException(TestFollowLinkCrawler testFollowLinkCrawler, String str, int i, long j) {
            this(str);
            this.retryCount = i;
            this.retryDelay = j;
        }

        public int getRetryCount() {
            return this.retryCount;
        }

        public long getRetryDelay() {
            return this.retryDelay;
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockUnretryableCacheException.class */
    private class MyMockUnretryableCacheException extends CacheException.UnretryableException {
        public MyMockUnretryableCacheException(String str) {
            super(str);
        }
    }

    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$MyMockUrlFetcher.class */
    public class MyMockUrlFetcher extends MockUrlFetcher {
        private boolean abortCrawl;
        String proxyHost;
        int proxyPort;

        public MyMockUrlFetcher(Crawler.CrawlerFacade crawlerFacade, String str) {
            super(crawlerFacade, str);
            this.abortCrawl = false;
            this.proxyHost = null;
        }

        @Override // org.lockss.test.MockUrlFetcher
        public InputStream getUncachedInputStream() throws IOException {
            checkAbort();
            return super.getUncachedInputStream();
        }

        @Override // org.lockss.test.MockUrlFetcher
        public UrlFetcher.FetchResult fetch() throws CacheException {
            checkAbort();
            return super.fetch();
        }

        private void checkAbort() {
            if (this.abortCrawl) {
                TestFollowLinkCrawler.this.crawler.abortCrawl();
            }
        }

        @Override // org.lockss.test.MockUrlFetcher
        public void setProxy(String str, int i) {
            this.proxyHost = str;
            this.proxyPort = i;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/lockss/crawler/TestFollowLinkCrawler$TestableFollowLinkCrawler.class */
    public class TestableFollowLinkCrawler extends FollowLinkCrawler {
        Set<String> urlsToFollow;
        Set<String> nonStartUrlsToFollow;
        Set fetched;
        boolean isFailOnStartUrlError;
        List<PermissionChecker> daemonPermissionCheckers;

        protected TestableFollowLinkCrawler(ArchivalUnit archivalUnit, AuState auState) {
            super(archivalUnit, auState);
            this.urlsToFollow = new HashSet();
            this.nonStartUrlsToFollow = new HashSet();
            this.fetched = new HashSet();
            this.isFailOnStartUrlError = true;
            this.crawlStatus = new CrawlerStatus(archivalUnit, archivalUnit.getStartUrls(), (String) null);
            setCrawlManager(TestFollowLinkCrawler.this.crawlMgr);
        }

        FollowLinkCrawler.MyLinkExtractorCallback newFoundUrlCallback(ArchivalUnit archivalUnit, CrawlUrlData crawlUrlData, CrawlQueue crawlQueue, Map<String, CrawlUrlData> map, Map<String, CrawlUrlData> map2) {
            return new FollowLinkCrawler.MyLinkExtractorCallback(this, archivalUnit, crawlUrlData, crawlQueue, map, map2);
        }

        protected boolean shouldFollowLink() {
            return true;
        }

        protected void doCrawlEndActions() {
        }

        protected void setUrlsToFollow(List list) {
            this.nonStartUrlsToFollow = new ListOrderedSet();
            this.nonStartUrlsToFollow.addAll(list);
        }

        protected void enqueueStartUrls() throws ArchivalUnit.ConfigurationException, PluginException, IOException {
            super.enqueueStartUrls();
            if (this.nonStartUrlsToFollow != null) {
                Iterator<String> it = this.nonStartUrlsToFollow.iterator();
                while (it.hasNext()) {
                    addToQueue(newCrawlUrlData(it.next(), 2), this.fetchQueue, this.crawlStatus);
                }
            }
        }

        public Crawler.Type getType() {
            throw new UnsupportedOperationException("not implemented");
        }

        public String getTypeString() {
            return "Follow Link";
        }

        public boolean isWholeAU() {
            return false;
        }

        protected boolean fetch(CrawlUrlData crawlUrlData) {
            this.fetched.add(crawlUrlData.getUrl());
            return super.fetch(crawlUrlData);
        }

        void setFailOnStartUrlError(boolean z) {
            this.isFailOnStartUrlError = z;
        }

        protected boolean isFailOnStartUrlError() {
            return this.isFailOnStartUrlError;
        }

        List<PermissionChecker> getDaemonPermissionCheckers() {
            return this.daemonPermissionCheckers != null ? this.daemonPermissionCheckers : super.getDaemonPermissionCheckers();
        }

        public void setDaemonPermissionCheckers(List<PermissionChecker> list) {
            this.daemonPermissionCheckers = list;
        }
    }

    @Override // org.lockss.test.LockssTestCase
    public void setUp() throws Exception {
        super.setUp();
        TimeBase.setSimulated(10L);
        this.theDaemon = getMockLockssDaemon();
        this.crawlMgr = new NoPauseCrawlManagerImpl();
        this.theDaemon.setCrawlManager(this.crawlMgr);
        this.crawlMgr.initService(this.theDaemon);
        this.theDaemon.getAlertManager();
        this.plug = new MockPlugin(getMockLockssDaemon());
        this.plug.initPlugin(getMockLockssDaemon());
        this.mau = new MyMockArchivalUnit();
        this.mau.setPlugin(this.plug);
        this.mau.setAuId("MyMockTestAu");
        this.aus = AuTestUtil.setUpMockAus(this.mau);
        this.startUrls = ListUtil.list(new String[]{this.startUrl});
        this.mcus = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.crawlRule = new MockCrawlRule();
        this.crawlRule.addUrlToCrawl(this.startUrl);
        this.mau.setCrawlRule(this.crawlRule);
        this.mau.setStartUrls(this.startUrls);
        this.mau.setPermissionUrls(this.startUrls);
        this.mau.setRefetchDepth(1);
        this.crawlMgr.newCrawlRateLimiter(this.mau);
        this.crawler = makeTestableCrawler();
        this.mau.setLinkExtractor("text/html", this.extractor);
        Properties properties = new Properties();
        properties.setProperty("org.lockss.crawler.retryDelay", "0");
        properties.setProperty("org.lockss.crawler.minRetryDelay", "0");
        ConfigurationUtil.setCurrentConfigFromProps(properties);
    }

    TestableFollowLinkCrawler makeTestableCrawler() {
        TestableFollowLinkCrawler testableFollowLinkCrawler = new TestableFollowLinkCrawler(this.mau, this.aus);
        testableFollowLinkCrawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(1)}));
        return testableFollowLinkCrawler;
    }

    public void testFlcThrowsForNullAu() {
        try {
            this.crawler = new TestableFollowLinkCrawler(null, null);
            fail("Constructing a FollowLinkCrawler with a null ArchivalUnit should throw an IllegalArgumentException");
        } catch (IllegalArgumentException e) {
        }
    }

    public void testFlcThrowsForNullAuState() {
        try {
            this.crawler = new TestableFollowLinkCrawler(this.mau, null);
            fail("Calling makeTestableFollowLinkCrawler with a null AuState should throw an IllegalArgumentException");
        } catch (IllegalArgumentException e) {
        }
    }

    public void testReturnsProperType() {
        try {
            this.crawler.getType();
            fail("Calling getType() , which should not be implemented in FollowLinkCrawler");
        } catch (UnsupportedOperationException e) {
        }
    }

    public void testCrawlSeedStartUrlsNotInCrawlSpec() throws ArchivalUnit.ConfigurationException, PluginException, IOException {
        this.mau.addUrlToBeCached(this.startUrl);
        this.mau.addUrlToBeCached(this.permissionUrl);
        CrawlSeed myBaseCrawlSeed = new MyBaseCrawlSeed(this.mau);
        this.startUrls.addAll(ListUtil.list(new String[]{"http://www.example2.com/index2.html", "http://www.example3.com/index3.html"}));
        myBaseCrawlSeed.setStartUrls(this.startUrls);
        this.mau.setCrawlSeed(myBaseCrawlSeed);
        assertEquals(this.startUrls, myBaseCrawlSeed.getStartUrls());
        this.crawler.enqueueStartUrls();
        CrawlerStatus crawlerStatus = this.crawler.getCrawlerStatus();
        assertEquals(9, crawlerStatus.getCrawlStatus());
        assertEquals(MapUtil.map(new Object[]{"http://www.example3.com/index3.html", "Start URL from CrawlSeed not within crawl rules", "http://www.example2.com/index2.html", "Start URL from CrawlSeed not within crawl rules"}), crawlerStatus.getUrlsWithErrors());
    }

    public void testNoProxy() {
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        assertEquals((String) null, this.mau.lastMmuf.proxyHost);
    }

    public void testGlobalProxy() {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        MyMockUrlFetcher myMockUrlFetcher = this.mau.lastMmuf;
        assertEquals("pr.wub", myMockUrlFetcher.proxyHost);
        assertEquals(27, myMockUrlFetcher.proxyPort);
    }

    public void testAuProxyOverride() throws Exception {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.mau.setConfiguration(ConfigurationUtil.fromArgs(ConfigParamDescr.CRAWL_PROXY.getKey(), "proxy.host:8086"));
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        MyMockUrlFetcher myMockUrlFetcher = this.mau.lastMmuf;
        assertEquals("proxy.host", myMockUrlFetcher.proxyHost);
        assertEquals(8086, myMockUrlFetcher.proxyPort);
    }

    public void testAuProxyOnly() throws Exception {
        this.mau.setConfiguration(ConfigurationUtil.fromArgs(ConfigParamDescr.CRAWL_PROXY.getKey(), "proxy.host:8087"));
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        MyMockUrlFetcher myMockUrlFetcher = this.mau.lastMmuf;
        assertEquals("proxy.host", myMockUrlFetcher.proxyHost);
        assertEquals(8087, myMockUrlFetcher.proxyPort);
    }

    public void testAuProxyDisable() throws Exception {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.mau.setConfiguration(ConfigurationUtil.fromArgs(ConfigParamDescr.CRAWL_PROXY.getKey(), "direct"));
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        assertEquals((String) null, this.mau.lastMmuf.proxyHost);
    }

    public void testIllAuProxyAbort() throws Exception {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.mau.setConfiguration(ConfigurationUtil.fromArgs(ConfigParamDescr.CRAWL_PROXY.getKey(), "proxy.host:8086:foo"));
        this.mau.addUrl(this.startUrl, false, true);
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        this.mau.addUrl("http://www.example.com/blah.html", false, true);
        assertFalse(this.crawler.doCrawl());
        assertTrue(this.crawler.isAborted());
    }

    public void testIllAuProxyContinue() throws Exception {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.abortOnInvalidProxy", "false");
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.mau.setConfiguration(ConfigurationUtil.fromArgs(ConfigParamDescr.CRAWL_PROXY.getKey(), "proxy.host:8086:foo"));
        this.mau.addUrl(this.startUrl, false, true);
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        this.mau.addUrl("http://www.example.com/blah.html", false, true);
        assertTrue(this.crawler.doCrawl());
        assertFalse(this.crawler.isAborted());
    }

    public void testMakeUrlFetcher() {
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        assertNull(this.mau.lastMmuf.proxyHost);
    }

    public void testMakeUrlFetcherProxy() {
        Properties properties = new Properties();
        properties.put("org.lockss.crawler.proxy.enabled", "true");
        properties.put("org.lockss.crawler.proxy.host", "pr.wub");
        properties.put("org.lockss.crawler.proxy.port", "27");
        ConfigurationUtil.addFromProps(properties);
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        this.mau.addUrl(this.startUrl);
        this.crawler.makeUrlFetcher(this.startUrl);
        MyMockUrlFetcher myMockUrlFetcher = this.mau.lastMmuf;
        assertEquals("pr.wub", myMockUrlFetcher.proxyHost);
        assertEquals(27, myMockUrlFetcher.proxyPort);
    }

    public void testReturnsTrueWhenCrawlSuccessful() {
        this.mau.addUrl(this.startUrl, false, true);
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        this.mau.addUrl("http://www.example.com/blah.html", false, true);
        assertTrue(doCrawl0(this.crawler));
    }

    public void testDoesNotCacheExistingFile() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, true, true);
        this.mau.addUrl("http://www.example.com/blah.html", true, true);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{this.startUrl}), mockCachedUrlSet.getCachedUrls());
    }

    public void testRefetchEmptyFileFalse() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html", "http://www.example.com/halb.html"}));
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, true, true);
        this.mau.addUrl("http://www.example.com/blah.html", "non-empty");
        this.mau.addUrl("http://www.example.com/halb.html", true, true);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{this.startUrl}), mockCachedUrlSet.getCachedUrls());
    }

    public void testRefetchEmptyFileTrue() {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.refetchEmptyFiles", "true");
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html", "http://www.example.com/halb.html"}));
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, true, true);
        this.mau.addUrl("http://www.example.com/blah.html", "non-empty");
        this.mau.addUrl("http://www.example.com/halb.html", true, true);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{this.startUrl, "http://www.example.com/halb.html"}), mockCachedUrlSet.getCachedUrls());
        this.crawler.getCrawlerStatus();
    }

    public void testRefetchEmptyFileTruePluginIgnores() {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.refetchEmptyFiles", "true");
        this.plug.getCacheResultMap().storeMapEntry(ContentValidationException.EmptyFile.class, CacheSuccess.class);
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html", "http://www.example.com/halb.html"}));
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, true, true);
        this.mau.addUrl("http://www.example.com/blah.html", "non-empty");
        this.mau.addUrl("http://www.example.com/halb.html", true, true);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{this.startUrl}), mockCachedUrlSet.getCachedUrls());
    }

    public void testHandlesRedirects() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        this.mau.addUrl(this.startUrl, false, true);
        CIProperties cIProperties = new CIProperties();
        cIProperties.put("X-Lockss-content-type", "text/html");
        cIProperties.put("X-Lockss-content-url", "http://www.example.com/extra_level/");
        this.mau.addUrl("http://www.example.com/blah.html", false, true, cIProperties);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{"http://www.example.com/extra_level/", this.startUrl}), this.extractor.getSrcUrls());
    }

    public void testCdnHost() throws Exception {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.globallyPermittedHosts", "foo\\.com");
        this.mau.setUrlStems(ListUtil.list(new String[]{"http://www.example.com/"}));
        this.mau.setStartUrls(ListUtil.list(new String[]{"http://www.example.com/one.html"}));
        this.crawler = makeTestableCrawler();
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl("http://www.example.com/one.html");
        this.mau.addUrl("http://foo.com/two");
        this.mau.addUrl("http://www.example.com/three");
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://foo.com/two", "http://www.example.com/three"}));
        assertEmpty(this.aus.getCdnStems());
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{"http://www.example.com/one.html", "http://foo.com/two", "http://www.example.com/three"}), this.crawler.fetched);
        assertEquals(ListUtil.list(new String[]{UrlUtil.getUrlPrefix("http://foo.com/two")}), this.aus.getCdnStems());
    }

    public void testRefindCdnHost() throws Exception {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.globallyPermittedHosts", "cdn\\.host");
        this.mau.setUrlStems(ListUtil.list(new String[]{"http://www.example.com/"}));
        this.mau.setStartUrls(ListUtil.list(new String[]{"http://www.example.com/blah.html"}));
        this.crawler = makeTestableCrawler();
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl("http://www.example.com/blah.html");
        this.mau.addUrl("http://cdn.host/halb.html", "foo");
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://cdn.host/halb.html"}));
        assertEmpty(this.aus.getCdnStems());
        assertTrue(this.crawler.doCrawl());
        assertEmpty(this.aus.getCdnStems());
        ConfigurationUtil.addFromArgs("org.lockss.crawler.refindCdnStems", "true");
        this.crawler = makeTestableCrawler();
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{"http://cdn.host/halb.html"}));
        assertTrue(this.crawler.doCrawl());
        assertEquals(ListUtil.list(new String[]{UrlUtil.getUrlPrefix("http://cdn.host/halb.html")}), this.aus.getCdnStems());
    }

    CIProperties fromArgs(String str, String str2) {
        CIProperties cIProperties = new CIProperties();
        cIProperties.put(str, str2);
        return cIProperties;
    }

    public void testParseCharset() {
        MockLinkExtractor mockLinkExtractor = new MockLinkExtractor();
        this.mau.setLinkExtractor("audio/inaudible", mockLinkExtractor);
        this.mau.setStartUrls(ListUtil.list(new String[]{"http://www.example.com/one.html", "http://www.example.com/two.html", "http://www.example.com/three.html"}));
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl("http://www.example.com/one.html", false, true, fromArgs("X-Lockss-content-type", "text/html"));
        this.mau.addUrl("http://www.example.com/two.html", false, true, fromArgs("X-Lockss-content-type", "text/html;charset=utf-8"));
        this.mau.addUrl("http://www.example.com/three.html", false, true, fromArgs("X-Lockss-content-type", "audio/inaudible"));
        assertTrue(this.crawler.doCrawl());
        assertSameElements(SetUtil.set(new List[]{ListUtil.list(new Object[]{this.mau, null, "ISO-8859-1", "http://www.example.com/one.html"}), ListUtil.list(new Object[]{this.mau, null, "utf-8", "http://www.example.com/two.html"})}), this.extractor.getArgs());
        assertSameElements(SetUtil.set(new List[]{ListUtil.list(new Object[]{this.mau, null, "ISO-8859-1", "http://www.example.com/three.html"})}), mockLinkExtractor.getArgs());
    }

    public void testCrawlFilter() {
        this.mau.setLinkExtractor("text/html", new DelimitedLinkExtractor());
        this.mau.setCrawlFilterFactory(new MyFiltFact(1));
        this.mau.setStartUrls(ListUtil.list(new String[]{"http://www.example.com/one.html"}));
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl("http://www.example.com/one.html", StringUtil.separatedString(ListUtil.list(new String[]{"http://www.example.com/two", "http://www.example.com/three", "http://www.example.com/four"}), " "));
        this.mau.addUrl("http://www.example.com/two", TestBaseCrawler.EMPTY_PAGE);
        this.mau.addUrl("http://www.example.com/three", TestBaseCrawler.EMPTY_PAGE);
        this.mau.addUrl("http://www.example.com/four", TestBaseCrawler.EMPTY_PAGE);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{"http://www.example.com/one.html", "http://www.example.com/three", "http://www.example.com/four"}), this.crawler.fetched);
    }

    List<Pattern> compileRegexps(List<String> list) throws MalformedPatternException {
        return RegexpUtil.compileRegexps(list);
    }

    public void testNoSubstance(SubstanceChecker.State state, List<String> list, List<String> list2) throws Exception {
        testNoSubstance(state, list, list2, null);
    }

    public void testNoSubstance(SubstanceChecker.State state, List<String> list, List<String> list2, List<String> list3) throws Exception {
        if (list != null) {
            this.mau.setSubstanceUrlPatterns(compileRegexps(list));
        }
        if (list2 != null) {
            this.mau.setNonSubstanceUrlPatterns(compileRegexps(list2));
        }
        if (list3 == null) {
            list3 = ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl4});
        }
        this.crawler.setUrlsToFollow(list3);
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl(this.nsurl1, false, true);
        this.mau.addUrl(this.nsurl2, false, true);
        CIProperties cIProperties = new CIProperties();
        cIProperties.put("X-Lockss-content-url", this.nsurl5);
        this.mau.addUrl(this.nsurl3, false, true, cIProperties);
        this.mau.addUrl(this.nsurl4, true, true);
        this.mau.populateAuCachedUrlSet();
        assertTrue(this.crawler.doCrawl());
        assertEquals(state, AuUtil.getAuState(this.mau).getSubstanceState());
    }

    void setSubstanceMode(String str) {
        ConfigurationUtil.addFromArgs("org.lockss.substanceChecker.detectNoSubstanceMode", str);
    }

    public void testNoSubstanceNoPats() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Unknown, null, null);
    }

    public void testNoSubstanceNoUrlsMatchSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.No, ListUtil.list(new String[]{"important"}), null);
    }

    public void testNoSubstanceSomeUrlsMatchSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, ListUtil.list(new String[]{"important", "two"}), null);
    }

    public void testNoSubstanceNoCrawledUrlsMatchSubstancePatterns() throws Exception {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.doFullSubstanceCheck", "false");
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.No, ListUtil.list(new String[]{"important", "redir"}), null);
    }

    public void testNoSubstanceFullWhenNoCrawledUrlsMatchSubstancePatterns() throws Exception {
        ConfigurationUtil.addFromArgs("org.lockss.crawler.doFullSubstanceCheck", "true");
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, ListUtil.list(new String[]{"important", "redir"}), null);
    }

    public void testNoSubstanceDisabled() throws Exception {
        setSubstanceMode("None");
        testNoSubstance(SubstanceChecker.State.Unknown, ListUtil.list(new String[]{"important", "two"}), null);
    }

    public void testNoSubstanceSomeAlreadyCachedUrlsMatchSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, ListUtil.list(new String[]{"four"}), null);
    }

    public void testNoSubstanceRedirUrlMatchesSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, ListUtil.list(new String[]{"important", "redir"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3}));
    }

    public void testNoSubstanceSumeUrlsMatchNonSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, null, ListUtil.list(new String[]{"important", "two"}));
    }

    public void testNoSubstanceAllUrlsMatchNonSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        ConfigurationUtil.addFromArgs("org.lockss.crawler.doFullSubstanceCheck", "false");
        testNoSubstance(SubstanceChecker.State.No, null, ListUtil.list(new String[]{"one", "two", "three", "four"}));
    }

    public void testNoSubstanceFullUrlsDontAllMatchNonSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        ConfigurationUtil.addFromArgs("org.lockss.crawler.doFullSubstanceCheck", "true");
        testNoSubstance(SubstanceChecker.State.Yes, null, ListUtil.list(new String[]{"one", "two", "three", "four"}));
    }

    public void testNoSubstanceMostUrlsMatchNonSubstancePatterns() throws Exception {
        setSubstanceMode("Crawl");
        testNoSubstance(SubstanceChecker.State.Yes, null, ListUtil.list(new String[]{"one", "tow", "three", "four"}));
    }

    public void testGetRetryCount() {
        Crawler.CrawlerFacade crawlerFacade = this.crawler.getCrawlerFacade();
        assertEquals(10, crawlerFacade.getRetryCount(new MyMockRetryableCacheException(this, "Test exception", 10 + 5, 0L)));
        ConfigurationUtil.addFromArgs("org.lockss.crawler.maxRetryCount", String.valueOf(57));
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        assertEquals(57 - 7, crawlerFacade.getRetryCount(new MyMockRetryableCacheException(this, "Test exception", 57 - 7, 0L)));
        assertEquals(57, crawlerFacade.getRetryCount(new MyMockRetryableCacheException(this, "Test exception", 57 + 5, 0L)));
        assertEquals(3, crawlerFacade.getRetryCount(new MyMockRetryableCacheException("Test exception")));
        ConfigurationUtil.addFromArgs(PARAM_RETRY_TIMES, "7");
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        assertEquals(7, crawlerFacade.getRetryCount(new MyMockRetryableCacheException("Test exception")));
    }

    public void testGetRetryDelay() {
        Crawler.CrawlerFacade crawlerFacade = this.crawler.getCrawlerFacade();
        assertEquals(1000L, crawlerFacade.getRetryDelay(new MyMockRetryableCacheException(this, "Test exception", 0, 1000 - 5)));
        ConfigurationUtil.addFromArgs("org.lockss.crawler.minRetryDelay", String.valueOf(993L));
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        assertEquals(993 + 3, crawlerFacade.getRetryDelay(new MyMockRetryableCacheException(this, "Test exception", 0, 993 + 3)));
        assertEquals(993L, crawlerFacade.getRetryDelay(new MyMockRetryableCacheException(this, "Test exception", 0, 993 - 2)));
        ConfigurationUtil.setCurrentConfigFromProps(new Properties());
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        assertEquals(10000L, crawlerFacade.getRetryDelay(new MyMockRetryableCacheException("Test exception")));
        ConfigurationUtil.addFromArgs("org.lockss.crawler.retryDelay", "765432");
        this.crawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        assertEquals(765432L, crawlerFacade.getRetryDelay(new MyMockRetryableCacheException("Test exception")));
    }

    private void setUpCrawlWindowTest(CrawlWindow crawlWindow) {
        this.mau.setCrawlWindow(crawlWindow);
        this.crawler = makeTestableCrawler();
        this.mau.addUrl(this.startUrl);
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1, CW_URL2, CW_URL3}));
        addUrls(ListUtil.list(new String[]{CW_URL1, CW_URL2, CW_URL3}));
        this.crawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(100)}));
        this.crawler.doCrawl();
    }

    public void testCrawlWindow() {
        setUpCrawlWindowTest(new MyMockCrawlWindow(3));
        assertEquals(SetUtil.set(new String[]{this.startUrl, CW_URL1}), ((MockCachedUrlSet) this.mau.getAuCachedUrlSet()).getCachedUrls());
        assertEquals(6, this.crawler.getCrawlerStatus().getCrawlStatus());
    }

    public void testCrawlWindowFetchNothing() {
        setUpCrawlWindowTest(new MyMockCrawlWindow(0));
        assertEquals(new HashSet(), ((MockCachedUrlSet) this.mau.getAuCachedUrlSet()).getCachedUrls());
        assertEquals(6, this.crawler.getCrawlerStatus().getCrawlStatus());
    }

    public void testCrawlWindowFetchOnePermissionPage() {
        setUpCrawlWindowTest(new MyMockCrawlWindow(1));
        assertEquals(new HashSet(), ((MockCachedUrlSet) this.mau.getAuCachedUrlSet()).getCachedUrls());
        assertEquals(6, this.crawler.getCrawlerStatus().getCrawlStatus());
    }

    public void testOutsideOfWindowAfterGetUrlsToFollow() {
        this.mau.setCrawlWindow(new MyMockCrawlWindow(0));
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1}));
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl(CW_URL1);
        assertFalse(this.crawler.doCrawl());
    }

    public void testAborted1() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1}));
        this.mau.addUrl(this.startUrl, true, true);
        this.crawler.abortCrawl();
        assertFalse(this.crawler.doCrawl());
        assertEmpty(this.mcus.getCachedUrls());
    }

    public void testAborted2() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1, "http://www.example.com/link4.html"}));
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, true, true);
        this.mau.addUrl(CW_URL1);
        this.mau.addUrl("http://www.example.com/link4.html");
        ((MyMockUrlFetcher) this.mau.makeUrlFetcher(this.crawler.getCrawlerFacade(), CW_URL1)).abortCrawl = true;
        ((MyMockUrlFetcher) this.mau.makeUrlFetcher(this.crawler.getCrawlerFacade(), "http://www.example.com/link4.html")).abortCrawl = true;
        assertFalse(this.crawler.doCrawl());
        Set cachedUrls = mockCachedUrlSet.getCachedUrls();
        assertEquals(2, cachedUrls.size());
        assertTrue(cachedUrls.contains(this.startUrl));
    }

    private Set crawlUrls(List list) {
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl);
        addUrls(list);
        this.crawler.doCrawl();
        return mockCachedUrlSet.getCachedUrls();
    }

    public void testDoesCollectHttps() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1}));
        this.extractor.addUrlsToReturn(CW_URL1, SetUtil.set(new String[]{CW_URL1, "https://www.example.com/link2.html", CW_URL3}));
        assertEquals(SetUtil.set(new String[]{this.startUrl, CW_URL1, "https://www.example.com/link2.html", CW_URL3}), crawlUrls(ListUtil.list(new String[]{CW_URL1, "https://www.example.com/link2.html", CW_URL3})));
    }

    public void testDoesCollectFtpAndGopher() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1}));
        this.extractor.addUrlsToReturn(CW_URL1, SetUtil.set(new String[]{CW_URL1, "ftp://www.example.com/link2.html", "gopher://www.example.com/link3.html"}));
        assertEquals(SetUtil.set(new String[]{this.startUrl, CW_URL1}), crawlUrls(ListUtil.list(new String[]{CW_URL1, "ftp://www.example.com/link2.html", "gopher://www.example.com/link3.html"})));
    }

    public void testDoesNotLoopOnSelfReferentialPage() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1}));
        this.extractor.addUrlsToReturn(CW_URL1, SetUtil.set(new String[]{CW_URL1, CW_URL2, CW_URL3}));
        assertEquals(SetUtil.set(new String[]{this.startUrl, CW_URL1, CW_URL2, CW_URL3}), crawlUrls(ListUtil.list(new String[]{CW_URL1, CW_URL2, CW_URL3})));
    }

    public void testDoesNotLoopOnSelfReferentialLoop() {
        this.crawler.setUrlsToFollow(ListUtil.list(new String[]{CW_URL1, CW_URL2, CW_URL3}));
        this.extractor.addUrlsToReturn(CW_URL1, SetUtil.set(new String[]{this.startUrl}));
        assertEquals(SetUtil.set(new String[]{this.startUrl, CW_URL1, CW_URL2, CW_URL3}), crawlUrls(ListUtil.list(new String[]{CW_URL1, CW_URL2, CW_URL3})));
    }

    public void testAbbreviatedCrawlTest(int i, SubstanceChecker.State state, Collection<String> collection, int i2, List<String> list, List<String> list2, List<String> list3) throws Exception {
        setSubstanceMode("Crawl");
        Configuration copy = this.mau.getConfiguration().copy();
        copy.put(ConfigParamDescr.CRAWL_TEST_SUBSTANCE_THRESHOLD.getKey(), TestBaseCrawler.EMPTY_PAGE + i2);
        this.mau.setConfiguration(copy);
        if (list != null) {
            this.mau.setSubstanceUrlPatterns(compileRegexps(list));
        }
        if (list2 != null) {
            this.mau.setNonSubstanceUrlPatterns(compileRegexps(list2));
        }
        this.mau.setStartUrls(list3);
        this.mau.addUrl(this.startUrl, false, true);
        this.mau.addUrl(this.nsurl1, false, true);
        this.mau.addUrl(this.nsurl2, false, true);
        this.mau.addUrl(this.nsurl3, false, true);
        this.mau.addUrl(this.nsurl4, true, true);
        assertFalse(this.crawler.doCrawl());
        SubstanceChecker.State substanceState = AuUtil.getAuState(this.mau).getSubstanceState();
        assertEquals(i, this.crawler.getCrawlerStatus().getCrawlStatus());
        assertEquals(state, substanceState);
        assertEquals(collection, this.crawler.fetched);
    }

    public void testCrawlTestPassZero() throws Exception {
        testAbbreviatedCrawlTest(13, SubstanceChecker.State.Yes, SetUtil.set(new String[]{this.nsurl1}), 0, ListUtil.list(new String[]{"html"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}));
    }

    public void testCrawlTestPass1() throws Exception {
        testAbbreviatedCrawlTest(13, SubstanceChecker.State.Yes, SetUtil.set(new String[]{this.nsurl1}), 1, ListUtil.list(new String[]{"html"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}));
    }

    public void testCrawlTestPass2() throws Exception {
        testAbbreviatedCrawlTest(13, SubstanceChecker.State.Yes, SetUtil.set(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}), 2, ListUtil.list(new String[]{"html"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}));
    }

    public void testCrawlTestFail() throws Exception {
        testAbbreviatedCrawlTest(14, SubstanceChecker.State.Yes, SetUtil.set(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}), 3, ListUtil.list(new String[]{"html"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}));
    }

    public void testCrawlTestCrawlFail() throws Exception {
        this.crawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(0)}));
        testAbbreviatedCrawlTest(8, SubstanceChecker.State.Unknown, Collections.EMPTY_SET, 1, ListUtil.list(new String[]{"html"}), null, ListUtil.list(new String[]{this.nsurl1, this.nsurl2, this.nsurl3, this.nsurl4}));
    }

    List<String> queueUrlList(CrawlQueue crawlQueue) {
        ArrayList arrayList = new ArrayList();
        Iterator<CrawlUrl> it = queueList(crawlQueue).iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getUrl());
        }
        return arrayList;
    }

    List<CrawlUrl> queueList(CrawlQueue crawlQueue) {
        ArrayList arrayList = new ArrayList();
        while (!crawlQueue.isEmpty()) {
            arrayList.add(crawlQueue.remove());
        }
        return arrayList;
    }

    public void testMyLinkExtractorCallback() {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit() { // from class: org.lockss.crawler.TestFollowLinkCrawler.1
            @Override // org.lockss.test.MockArchivalUnit
            public boolean shouldBeCached(String str) {
                return StringUtil.startsWithIgnoreCase(str, "http://www.example.com/");
            }

            @Override // org.lockss.test.MockArchivalUnit
            public String siteNormalizeUrl(String str) {
                return StringUtil.replaceString(str, "SESSION/", TestBaseCrawler.EMPTY_PAGE);
            }
        };
        CrawlUrlData crawlUrlData = new CrawlUrlData("referring.url", 0);
        CrawlQueue crawlQueue = new CrawlQueue((Comparator) null);
        FollowLinkCrawler.MyLinkExtractorCallback newFoundUrlCallback = this.crawler.newFoundUrlCallback(mockArchivalUnit, crawlUrlData, crawlQueue, new HashMap(), new HashMap());
        newFoundUrlCallback.foundLink("http://www.example.com/foo.bar");
        newFoundUrlCallback.foundLink("http://www.example.com/SESSION/foo.bar");
        newFoundUrlCallback.foundLink("HTTP://www.example.com/SESSION/foo.bar");
        CrawlUrlData crawlUrlData2 = queueList(crawlQueue).get(0);
        assertEquals("http://www.example.com/foo.bar", crawlUrlData2.getUrl());
        assertEquals("referring.url", crawlUrlData2.getReferrer());
        assertEquals(1, crawlUrlData2.getDepth());
        newFoundUrlCallback.foundLink("http://www.example.com/foo/../..");
        switch (CurrentConfig.getIntParam("org.lockss.UrlUtil.pathTraversalAction", 2)) {
            case 1:
                assertEquals(ListUtil.list(new String[]{"http://www.example.com/../"}), queueUrlList(crawlQueue));
                return;
            case 2:
                assertEquals(ListUtil.list(new String[]{"http://www.example.com/"}), queueUrlList(crawlQueue));
                return;
            case 3:
                assertTrue(crawlQueue.isEmpty());
                return;
            default:
                return;
        }
    }

    public void testDoesNotCacheFileWhichShouldNotBeCached() {
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        this.mau.addUrl(this.startUrl, false, false);
        this.crawler.doCrawl();
        assertEquals(SetUtil.set(new Object[0]), mockCachedUrlSet.getCachedUrls());
    }

    private void addUrls(List<String> list) {
        for (String str : list) {
            this.mau.addUrl(str);
            this.crawlRule.addUrlToCrawl(str);
        }
    }

    private MockCachedUrlSet permissionPageTestSetup(List<String> list, int i, List<String> list2, MockArchivalUnit mockArchivalUnit) {
        mockArchivalUnit.setPlugin(new MockPlugin(getMockLockssDaemon()));
        mockArchivalUnit.setAuId("permissionPage au");
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) mockArchivalUnit.getAuCachedUrlSet();
        this.crawlRule = new MockCrawlRule();
        mockArchivalUnit.setStartUrls(list != null ? list : list2);
        mockArchivalUnit.setPermissionUrls(list != null ? list : list2);
        mockArchivalUnit.setCrawlRule(this.crawlRule);
        mockArchivalUnit.setRefetchDepth(1);
        this.crawler = makeTestableCrawler();
        mockArchivalUnit.setLinkExtractor("text/html", this.extractor);
        return mockCachedUrlSet;
    }

    public void testMultiPermissionPageShouldPass() {
        List<String> list = ListUtil.list(new String[]{this.startUrl, "http://www.foo.com/index.html"});
        List<String> list2 = ListUtil.list(new String[]{CW_URL1, CW_URL2, "http://www.foo.com/link3.html", "http://www.foo.com/link4.html"});
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        addUrls(list);
        addUrls(list2);
        this.mau.setStartUrls(list);
        this.mau.setPermissionUrls(list);
        this.crawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(2)}));
        this.crawler.setUrlsToFollow(list2);
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.fromList(ListUtil.append(new List[]{list2, list})), mockCachedUrlSet.getCachedUrls());
    }

    public void testMultiStartPageShouldPassPermission() {
        List<String> append = ListUtil.append(new List[]{ListUtil.list(new String[]{this.startUrl, "http://www.foo.com/index.html"}), ListUtil.list(new String[]{CW_URL1, CW_URL2, "http://www.foo.com/link3.html", "http://www.foo.com/link4.html"})});
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        addUrls(append);
        this.mau.setStartUrls(append);
        this.mau.setPermissionUrls(append);
        this.crawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(2)}));
        assertTrue(this.crawler.doCrawl());
        assertEquals(SetUtil.fromList(append), mockCachedUrlSet.getCachedUrls());
    }

    public void testPermissionPageMissing() {
        List<String> list = ListUtil.list(new String[]{"http://www.example.com/index.html"});
        List<String> list2 = ListUtil.list(new String[]{CW_URL1, "http://www.foo.com/link3.html"});
        MockCachedUrlSet mockCachedUrlSet = (MockCachedUrlSet) this.mau.getAuCachedUrlSet();
        addUrls(list2);
        addUrls(list);
        this.mau.setStartUrls(this.startUrls);
        this.mau.setPermissionUrls(list);
        this.crawler.setDaemonPermissionCheckers(ListUtil.list(new PermissionChecker[]{new MyMockPermissionChecker(1)}));
        this.crawler.setUrlsToFollow(list2);
        assertFalse(this.crawler.doCrawl());
        assertEquals(SetUtil.set(new String[]{"http://www.example.com/index.html", CW_URL1}), mockCachedUrlSet.getCachedUrls());
    }

    private static void setProperty(String str, String str2) {
        ConfigurationUtil.addFromArgs(str, str2);
    }

    boolean doCrawl0(BaseCrawler baseCrawler) {
        baseCrawler.setCrawlConfig(ConfigManager.getCurrentConfig());
        return baseCrawler.doCrawl0();
    }

    public static void main(String[] strArr) {
        TestRunner.main(new String[]{TestFollowLinkCrawler.class.getName()});
    }
}
