package org.lockss.filter.html;

import java.io.InputStream;
import java.util.List;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Tag;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.tags.OptionTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserUtils;
import org.htmlparser.util.SimpleNodeIterator;
import org.lockss.crawler.TestBaseCrawler;
import org.lockss.filter.html.HtmlNodeFilters;
import org.lockss.servlet.ServletUtil;
import org.lockss.test.LockssTestCase;
import org.lockss.test.MockArchivalUnit;
import org.lockss.test.MockLinkRewriterFactory;
import org.lockss.test.StringInputStream;
import org.lockss.util.ListUtil;
import org.lockss.util.Logger;
import org.lockss.util.StringUtil;

/* loaded from: input_file:org/lockss/filter/html/TestHtmlNodeFilters.class */
public class TestHtmlNodeFilters extends LockssTestCase {
    private static final String page = "foo<a href=\"http://www.example.com/index.html\">bar</a>baz";
    private static final String origUrl = "http://www.example.com/index.html";
    private static final String finalUrl = "http://foo.lockss.org/index.html";
    private static final String metapage = "<meta name=\"abc\" content=\"http://www.example.com/index.html\">\n<meta name=\"def\" content=\"http://www.example42.com/index.html\">\n<meta name=\"def\" content=\"http://www.example.com/index.html\">\n";
    static Logger log = Logger.getLogger();
    private static final String[] attrs = {"href", "src"};

    public void testAssumptions() throws Exception {
        CompositeTag elementAt = parse("<option value=\"val1\">blue 13</option>").elementAt(0);
        assertTrue(elementAt instanceof CompositeTag);
        assertEquals("blue 13", elementAt.getStringText());
        assertEquals("option value=\"val1\"", elementAt.getText());
        Node elementAt2 = parse("some text").elementAt(0);
        assertFalse(elementAt2 instanceof CompositeTag);
        assertEquals("some text", elementAt2.getText());
    }

    public void testIll() {
        try {
            HtmlNodeFilters.tagWithAttribute((String) null, "attr", "aval");
            fail("null filter should throw");
        } catch (NullPointerException e) {
        }
        try {
            HtmlNodeFilters.tagWithAttribute("atag", (String) null, "aval");
            fail("null filter should throw");
        } catch (NullPointerException e2) {
        }
    }

    public void testTagWithAttributeWithValue() throws Exception {
        NodeFilter tagWithAttribute = HtmlNodeFilters.tagWithAttribute("div", "attr", "aval");
        assertFalse(tagWithAttribute.accept(divWithAttr("foo", "bar")));
        assertFalse(tagWithAttribute.accept(divWithAttr("attr", "bar")));
        assertFalse(tagWithAttribute.accept(divWithAttr("btag", "aval")));
        assertTrue(tagWithAttribute.accept(divWithAttr("attr", "aval")));
    }

    public void testTagWithAttributeWithoutValue() throws Exception {
        NodeFilter tagWithAttribute = HtmlNodeFilters.tagWithAttribute("div", "attr");
        assertFalse(tagWithAttribute.accept(divWithAttr("foo", "bar")));
        assertFalse(tagWithAttribute.accept(divWithAttr("btag", "aval")));
        assertTrue(tagWithAttribute.accept(divWithAttr("attr", "aval")));
        assertTrue(tagWithAttribute.accept(divWithAttr("attr", "bar")));
        assertTrue(tagWithAttribute.accept(divWithAttr("attr", "qux")));
    }

    public void testDivWithAttribute() throws Exception {
        NodeFilter divWithAttribute = HtmlNodeFilters.divWithAttribute("attr", "aval");
        assertFalse(divWithAttribute.accept(divWithAttr("attr", "bar")));
        assertTrue(divWithAttribute.accept(divWithAttr("attr", "aval")));
    }

    public void testAttributeRegex() throws Exception {
        NodeFilter tagWithAttributeRegex = HtmlNodeFilters.tagWithAttributeRegex("div", "attr", "a+b");
        assertFalse(tagWithAttributeRegex.accept(divWithAttr("attr", "ba")));
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "ab")));
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "abb")));
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "abbb")));
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "abbbbc")));
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "xxabbbbc")));
        NodeFilter tagWithAttributeRegex2 = HtmlNodeFilters.tagWithAttributeRegex("div", "attr", "^a+b$");
        assertTrue(tagWithAttributeRegex.accept(divWithAttr("attr", "abbb")));
        assertFalse(tagWithAttributeRegex2.accept(divWithAttr("attr", "xxabbbb")));
        assertFalse(tagWithAttributeRegex2.accept(divWithAttr("attr", "abbbbc")));
    }

    public void testTagWithText() throws Exception {
        NodeFilter tagWithText = HtmlNodeFilters.tagWithText("option", "article is cited by");
        NodeList extractAllNodesThatMatch = parse("<b><option value=\"#citart1\">This article is cited by the following articles in ...</option>").extractAllNodesThatMatch(tagWithText);
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        OptionTag elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof OptionTag);
        assertEquals("This article is cited by the following articles in ...", elementAt.getStringText());
        NodeList extractAllNodesThatMatch2 = parse("<b><script value=\"#citart1\">This article is cited by the following articles in ...</script>").extractAllNodesThatMatch(tagWithText);
        assertEquals("should be empty: " + extractAllNodesThatMatch2, 0, extractAllNodesThatMatch2.size());
        NodeList extractAllNodesThatMatch3 = parse("some text").extractAllNodesThatMatch(tagWithText);
        assertEquals("Should be empty: " + extractAllNodesThatMatch3, 0, extractAllNodesThatMatch3.size());
        NodeList extractAllNodesThatMatch4 = parse("<b><option value=\"#citart1\">this article isn't cited by anyone</option>").extractAllNodesThatMatch(tagWithText);
        assertEquals("should be empty: " + extractAllNodesThatMatch4, 0, extractAllNodesThatMatch4.size());
    }

    public void testEmptyComposite() throws Exception {
        assertEquals(0, parse("foo<p class=\"cls\"/>bar").extractAllNodesThatMatch(HtmlNodeFilters.tagWithText("p", "a paragraph text")).size());
    }

    public void testTagWithTextRegex() throws Exception {
        NodeFilter tagWithTextRegex = HtmlNodeFilters.tagWithTextRegex("option", "article [is]+ cited by.*ll.*l");
        NodeList extractAllNodesThatMatch = parse("<b><option value=\"#citart1\">This article is cited by the following articles in ...</option>").extractAllNodesThatMatch(tagWithTextRegex);
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        OptionTag elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof OptionTag);
        assertEquals("This article is cited by the following articles in ...", elementAt.getStringText());
        NodeList extractAllNodesThatMatch2 = parse("<b><script value=\"#citart1\">This article is cited by the following articles in ...</script>").extractAllNodesThatMatch(tagWithTextRegex);
        assertEquals("should be empty: " + extractAllNodesThatMatch2, 0, extractAllNodesThatMatch2.size());
        NodeList extractAllNodesThatMatch3 = parse("some text").extractAllNodesThatMatch(tagWithTextRegex);
        assertEquals("Should be empty: " + extractAllNodesThatMatch3, 0, extractAllNodesThatMatch3.size());
        NodeList extractAllNodesThatMatch4 = parse("<b><option value=\"#citart1\">this article isn't cited by anyone</option>").extractAllNodesThatMatch(tagWithTextRegex);
        assertEquals("should be empty: " + extractAllNodesThatMatch4, 0, extractAllNodesThatMatch4.size());
    }

    public void testEmptyCompositeRegex() throws Exception {
        assertEquals(0, parse("foo<p class=\"cls\"/>bar").extractAllNodesThatMatch(HtmlNodeFilters.tagWithTextRegex("p", ".*text")).size());
    }

    public void testComment() throws Exception {
        NodeFilter comment = HtmlNodeFilters.comment();
        parse("foo<b>bar baz qux <!-- comment1 -->");
        NodeList extractAllNodesThatMatch = parse("foo<b>bar<!-- comment -->baz qux<!-- \n multi \n line \n comment \n -->fred garply").extractAllNodesThatMatch(comment);
        assertEquals("Should have two elements: " + extractAllNodesThatMatch, 2, extractAllNodesThatMatch.size());
        Node elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof RemarkNode);
        assertEquals(" comment ", elementAt.getText());
        Node elementAt2 = extractAllNodesThatMatch.elementAt(1);
        assertTrue(elementAt2 instanceof RemarkNode);
        assertEquals(" \n multi \n line \n comment \n ", elementAt2.getText());
    }

    public void testCommentWithString() throws Exception {
        NodeFilter commentWithString = HtmlNodeFilters.commentWithString("sub String");
        NodeList parse = parse("foo<b>bar sub String <!-- dub String -->");
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(commentWithString).size());
        NodeList extractAllNodesThatMatch = parse("foo<b>bar<!-- sub String -->baz").extractAllNodesThatMatch(commentWithString);
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        Node elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof RemarkNode);
        assertEquals(" sub String ", elementAt.getText());
    }

    public void testCommentWithStringCase() throws Exception {
        NodeFilter commentWithString = HtmlNodeFilters.commentWithString("sub String");
        parse("foo<b>bar<!-- Sub string -->baz");
        NodeList parse = parse("foo<b>bar<!-- Sub string -->baz");
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(commentWithString).size());
        NodeList extractAllNodesThatMatch = parse.extractAllNodesThatMatch(HtmlNodeFilters.commentWithString("sub String", true));
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        Node elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof RemarkNode);
        assertEquals(" Sub string ", elementAt.getText());
    }

    public void testCommentWithRegex() throws Exception {
        NodeFilter commentWithRegex = HtmlNodeFilters.commentWithRegex("Begin ad [0-9]+");
        NodeList parse = parse("foo<b>bar Begin ad 27 <!-- Begin ad x -->");
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(commentWithRegex).size());
        NodeList extractAllNodesThatMatch = parse("foo<b>bar<!-- Begin ad 42 -->baz").extractAllNodesThatMatch(commentWithRegex);
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        Node elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(elementAt instanceof RemarkNode);
        assertEquals(" Begin ad 42 ", elementAt.getText());
    }

    public void testCommentWithRegexCase() throws Exception {
        NodeFilter commentWithRegex = HtmlNodeFilters.commentWithRegex("begin Ad [0-9]+");
        NodeList parse = parse("foo<b>bar Begin ad 27 <!-- Begin ad 3 foo -->");
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(commentWithRegex).size());
        NodeList extractAllNodesThatMatch = parse.extractAllNodesThatMatch(HtmlNodeFilters.commentWithRegex("begin Ad [0-9]+", true));
        assertEquals("Should have one element: " + extractAllNodesThatMatch, 1, extractAllNodesThatMatch.size());
        Node elementAt = extractAllNodesThatMatch.elementAt(0);
        assertTrue(TestBaseCrawler.EMPTY_PAGE + elementAt.getClass(), elementAt instanceof RemarkNode);
        assertEquals(" Begin ad 3 foo ", elementAt.getText());
    }

    public void testUrlEncode() throws Exception {
        HtmlNodeFilters.RefreshRegexXform refreshRegexXform = new HtmlNodeFilters.RefreshRegexXform("^http://", true, "^/", "foo");
        assertEquals(TestBaseCrawler.EMPTY_PAGE, refreshRegexXform.urlEncode(TestBaseCrawler.EMPTY_PAGE));
        assertEquals("no url arg", refreshRegexXform.urlEncode("no url arg"));
        assertEquals("?url=http%3A%2F%2Ffoo.bar%2Fpath%2Ffile.html", refreshRegexXform.urlEncode("?url=http://foo.bar/path/file.html"));
        assertEquals("?url=http%3A%2F%2Ffoo.bar%2Fpath%2Ffile.html#ref", refreshRegexXform.urlEncode("?url=http://foo.bar/path/file.html#ref"));
        assertEquals("?url=http%3A%2F%2Ffoo.bar%2F\")oth/er", refreshRegexXform.urlEncode("?url=http://foo.bar/\")oth/er"));
        assertEquals("?url=http%3A%2F%2Ffoo.bar%2F)oth/er", refreshRegexXform.urlEncode("?url=http://foo.bar/)oth/er", true));
    }

    public void testLinkRegexYesXformsNoMatch() throws Exception {
        NodeList parse = parse(page);
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(HtmlNodeFilters.linkRegexYesXforms(new String[]{"http://www.content.org/"}, new boolean[]{true}, new String[]{"http://www.content.org/"}, new String[]{"http://foo.lockss.org/"}, attrs)).size());
        LinkTag elementAt = parse.elementAt(1);
        assertNotNull(elementAt);
        assertTrue(TestBaseCrawler.EMPTY_PAGE + elementAt.getClass(), elementAt instanceof LinkTag);
        assertEquals("http://www.example.com/index.html", elementAt.extractLink());
    }

    public void testLinkRegexYesXformsMatch() throws Exception {
        NodeList parse = parse(page);
        log.debug3("testLinkRegexYesXformsMatch before " + parse.toHtml());
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(HtmlNodeFilters.linkRegexYesXforms(new String[]{"http://www.example.com/"}, new boolean[]{true}, new String[]{"http://www.example.com/"}, new String[]{"http://foo.lockss.org/"}, attrs)).size());
        assertEquals(finalUrl, parse.elementAt(1).extractLink());
    }

    static String[] arr(String... strArr) {
        return strArr;
    }

    static boolean[] arr(boolean... zArr) {
        return zArr;
    }

    public void testLinkRegexNoXformsMatch() throws Exception {
        NodeList parse = parse(page);
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(HtmlNodeFilters.linkRegexNoXforms(new String[]{"http://www.example.com/"}, new boolean[]{true}, new String[]{"http://www.example.com/"}, new String[]{"http://foo.lockss.org/"}, attrs)).size());
        LinkTag elementAt = parse.elementAt(1);
        assertNotNull(elementAt);
        assertTrue(TestBaseCrawler.EMPTY_PAGE + elementAt.getClass(), elementAt instanceof LinkTag);
        assertEquals("http://www.example.com/index.html", elementAt.extractLink());
    }

    public void testMetaTagRegexYesXforms() throws Exception {
        NodeFilter metaTagRegexYesXforms = HtmlNodeFilters.metaTagRegexYesXforms(arr("http://www.example.com/"), arr(true), arr("http://www.example.com/"), arr("http://foo.lockss.org/"), ListUtil.list(new String[]{"aaa", "def"}));
        NodeList parse = parse(metapage);
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(metaTagRegexYesXforms).size());
        MetaTag elementAt = parse.elementAt(0);
        assertClass(MetaTag.class, elementAt);
        assertEquals("abc", elementAt.getAttribute("name"));
        assertEquals("http://www.example.com/index.html", elementAt.getAttribute("content"));
        MetaTag elementAt2 = parse.elementAt(2);
        assertClass(MetaTag.class, elementAt2);
        assertEquals("def", elementAt2.getAttribute("name"));
        assertEquals("http://www.example42.com/index.html", elementAt2.getAttribute("content"));
        MetaTag elementAt3 = parse.elementAt(4);
        assertClass(MetaTag.class, elementAt3);
        assertEquals("def", elementAt3.getAttribute("name"));
        assertEquals(finalUrl, elementAt3.getAttribute("content"));
    }

    public void testEmptyStyleTagDispatch() throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.1
            public String rewrite(String str) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.StyleTagXformDispatch styleTagXformDispatch = new HtmlNodeFilters.StyleTagXformDispatch(mockArchivalUnit, (String) null, "http://example.com/base/", linkTransform);
        NodeList parse = parse("foo <style type=\"text/css\" media=\"screen\"></style>\nbar\n");
        assertEquals(0, parse.extractAllNodesThatMatch(styleTagXformDispatch).size());
        assertEquals("foo <style type=\"text/css\" media=\"screen\"></style>\nbar\n", parse.toHtml());
        assertEmpty("LinkRewriterFactory should not have been invoked", mockLinkRewriterFactory.getArgLists());
    }

    public void testStyleTagWithSrcNoDispatch() throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.2
            public String rewrite(String str) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.StyleTagXformDispatch styleTagXformDispatch = new HtmlNodeFilters.StyleTagXformDispatch(mockArchivalUnit, (String) null, "http://example.com/base/", linkTransform);
        mockLinkRewriterFactory.setLinkRewriter(new StringInputStream("shouldn't"));
        NodeList parse = parse("foo <style type=\"text/css\" src=\"foo.css\">xxx</style>\nbar\n");
        assertEquals(0, parse.extractAllNodesThatMatch(styleTagXformDispatch).size());
        assertEquals("foo <style type=\"text/css\" src=\"foo.css\">xxx</style>\nbar\n", parse.toHtml());
        assertEmpty("LinkRewriterFactory should not have been invoked", mockLinkRewriterFactory.getArgLists());
    }

    public void testStyleTagDispatch(String str) throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.3
            public String rewrite(String str2) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.StyleTagXformDispatch styleTagXformDispatch = new HtmlNodeFilters.StyleTagXformDispatch(mockArchivalUnit, str, "http://example.com/base/", linkTransform);
        mockLinkRewriterFactory.setLinkRewriter(new StringInputStream("result string"));
        NodeList parse = parse("foo <style type=\"text/css\" media=\"screen\">\n@import \"/resource/css/hw.css\";\n@import \"/resource/css/btcint.css\";\n</style>\nbar\n");
        assertEquals(0, parse.extractAllNodesThatMatch(styleTagXformDispatch).size());
        assertEquals("foo <style type=\"text/css\" media=\"screen\">result string</style>\nbar\n", parse.toHtml());
        List list = mockLinkRewriterFactory.getArgLists().get(0);
        assertEquals("text/css", list.get(0));
        assertEquals(mockArchivalUnit, list.get(1));
        assertEquals("\n@import \"/resource/css/hw.css\";\n@import \"/resource/css/btcint.css\";\n", StringUtil.fromInputStream((InputStream) list.get(2)));
        assertEquals(str == null ? "ISO-8859-1" : str, list.get(3));
    }

    public void testStyleTagDispatch() throws Exception {
        testStyleTagDispatch("UTF-8");
    }

    public void testStyleTagDispatchNoCharset() throws Exception {
        testStyleTagDispatch(null);
    }

    public void testStyleAttrDispatch() throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.4
            public String rewrite(String str) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.StyleAttrXformDispatch styleAttrXformDispatch = new HtmlNodeFilters.StyleAttrXformDispatch(mockArchivalUnit, (String) null, "http://example.com/base/", linkTransform);
        mockLinkRewriterFactory.setLinkRewriter(new StringInputStream("result string"));
        NodeList parse = parse("<span class=\"foo\" style=\"background: url('/backg.png') no-repeat 0px -64px;\" />");
        assertEquals(0, parse.extractAllNodesThatMatch(styleAttrXformDispatch).size());
        assertEquals("<span class=\"foo\" style=\"result string\" />", parse.toHtml());
        List list = mockLinkRewriterFactory.getArgLists().get(0);
        assertEquals("text/css", list.get(0));
        assertEquals(mockArchivalUnit, list.get(1));
        assertEquals("background: url('/backg.png') no-repeat 0px -64px;", StringUtil.fromInputStream((InputStream) list.get(2)));
    }

    public void testEmptyScriptDispatch() throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.5
            public String rewrite(String str) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.ScriptXformDispatch scriptXformDispatch = new HtmlNodeFilters.ScriptXformDispatch(mockArchivalUnit, (String) null, "http://example.com/base/", linkTransform);
        NodeList parse = parse("foo <script type=\"text/javascript\"></script>\nbar\n");
        assertEquals(0, parse.extractAllNodesThatMatch(scriptXformDispatch).size());
        assertEquals("foo <script type=\"text/javascript\"></script>\nbar\n", parse.toHtml());
        assertEmpty("LinkRewriterFactory should not have been invoked", mockLinkRewriterFactory.getArgLists());
    }

    public void testScriptDispatch(String str, String str2, String str3) throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.6
            public String rewrite(String str4) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/javascript", mockLinkRewriterFactory);
        HtmlNodeFilters.ScriptXformDispatch scriptXformDispatch = new HtmlNodeFilters.ScriptXformDispatch(mockArchivalUnit, str3, "http://example.com/base/", linkTransform);
        mockLinkRewriterFactory.setLinkRewriter(new StringInputStream("\nresult string\n"));
        NodeList parse = parse(str);
        assertEquals(0, parse.extractAllNodesThatMatch(scriptXformDispatch).size());
        assertEquals(str2, parse.toHtml());
        List list = mockLinkRewriterFactory.getArgLists().get(0);
        assertEquals("text/javascript", list.get(0));
        assertEquals(mockArchivalUnit, list.get(1));
        assertEquals("\norig script;\n", StringUtil.fromInputStream((InputStream) list.get(2)));
        assertEquals(str3 == null ? "ISO-8859-1" : str3, list.get(3));
    }

    public void testScriptDispatch(String str, String str2) throws Exception {
        testScriptDispatch(str, str2, "UTF-8");
        testScriptDispatch(str, str2, null);
    }

    public void testScriptDispatchWType(String str, String str2) throws Exception {
        testScriptDispatch("foo <script type=\"text/javascript\">\norig script;\n</script>\nbar\n", "foo <script type=\"text/javascript\">\nresult string\n</script>\nbar\n");
    }

    public void testScriptDispatchWLang(String str, String str2) throws Exception {
        testScriptDispatch("foo <script language=\"javascript\">\norig script;\n</script>\nbar\n", "foo <script language==\"javascript\">\nresult string\n</script>\nbar\n");
    }

    public void testScriptWithSrcNoDispatch() throws Exception {
        MockArchivalUnit mockArchivalUnit = new MockArchivalUnit();
        MockLinkRewriterFactory mockLinkRewriterFactory = new MockLinkRewriterFactory();
        ServletUtil.LinkTransform linkTransform = new ServletUtil.LinkTransform() { // from class: org.lockss.filter.html.TestHtmlNodeFilters.7
            public String rewrite(String str) {
                return "rewritten";
            }
        };
        mockArchivalUnit.setLinkRewriterFactory("text/css", mockLinkRewriterFactory);
        HtmlNodeFilters.ScriptXformDispatch scriptXformDispatch = new HtmlNodeFilters.ScriptXformDispatch(mockArchivalUnit, (String) null, "http://example.com/base/", linkTransform);
        mockLinkRewriterFactory.setLinkRewriter(new StringInputStream("shouldn't"));
        NodeList parse = parse("foo <script type=\"text/css\" src=\"foo.css\">xxx</script>\nbar\n");
        assertEquals(0, parse.extractAllNodesThatMatch(scriptXformDispatch).size());
        assertEquals("foo <script type=\"text/css\" src=\"foo.css\">xxx</script>\nbar\n", parse.toHtml());
        assertEmpty("LinkRewriterFactory should not have been invoked", mockLinkRewriterFactory.getArgLists());
    }

    public void testLinkRegexNoXformsNoMatch() throws Exception {
        NodeList parse = parse(page);
        assertEquals("Should be empty: " + parse, 0, parse.extractAllNodesThatMatch(HtmlNodeFilters.linkRegexNoXforms(new String[]{"http://www.content.org/"}, new boolean[]{true}, new String[]{"http://www.example.com/"}, new String[]{"http://foo.lockss.org/"}, attrs)).size());
        assertEquals(finalUrl, parse.elementAt(1).extractLink());
    }

    NodeList parse(String str) throws Exception {
        NodeList parse = ParserUtils.createParserParsingAnInputString(str).parse((NodeFilter) null);
        if (log.isDebug3()) {
            log.debug3("parsed (" + parse.size() + "):\n" + HtmlFilterInputStream.nodeString(parse));
        }
        return parse;
    }

    Node divWithAttr(String str, String str2) throws Exception {
        return tagWithAttr(Div.class, str, str2);
    }

    Node tagWithAttr(Class cls, String str, String str2) throws Exception {
        Tag tag = (Tag) cls.newInstance();
        tag.setAttribute(str, str2);
        return tag;
    }

    public void testAllExceptSubtreeNodeFilter() throws Exception {
        NodeList parse = parse("<div id=\"a1\">  <div id=\"a11\">    <div id=\"a111\">...</div>    <div id=\"a112\">...</div>    <div id=\"a113\">...</div>  </div>  <div id=\"a12\">    <div id=\"a121\">      <div id=\"a1211\">...</div>      <div id=\"a1212\">...</div>      <div id=\"a1213\">...</div>    </div>    <div id=\"a122\">      <div id=\"a1221\">...</div>      <div id=\"a1222\">...</div>      <div id=\"a1223\">...</div>    </div>    <div id=\"a123\">      <div id=\"a1231\">...</div>      <div id=\"a1232\">...</div>      <div id=\"a1233\">...</div>    </div>  </div>  <div id=\"a13\">    <div id=\"a131\">...</div>    <div id=\"a132\">...</div>    <div id=\"a133\">...</div>  </div></div>");
        NodeList nodeList = new NodeList();
        parse.elementAt(0).collectInto(nodeList, HtmlNodeFilters.allExceptSubtree(HtmlNodeFilters.tagWithAttribute("div", "id", "a12"), HtmlNodeFilters.tagWithAttribute("div", "id", "a122")));
        nodeList.keepAllNodesThatMatch(HtmlNodeFilters.tagWithAttribute("div", "id"));
        String[] strArr = {"a121", "a1211", "a1212", "a1213", "a123", "a1231", "a1232", "a1233"};
        assertEquals(strArr.length, nodeList.size());
        SimpleNodeIterator elements = nodeList.elements();
        int i = 0;
        while (elements.hasMoreNodes()) {
            assertEquals(strArr[i], elements.nextNode().getAttribute("id"));
            i++;
        }
        NodeList nodeList2 = new NodeList();
        parse.elementAt(0).collectInto(nodeList2, HtmlNodeFilters.allExceptSubtree(HtmlNodeFilters.tagWithAttribute("div", "id", "a12"), HtmlNodeFilters.tagWithAttribute("div", "id", "a99")));
        nodeList2.keepAllNodesThatMatch(HtmlNodeFilters.tagWithAttribute("div", "id"));
        String[] strArr2 = {"a12", "a121", "a1211", "a1212", "a1213", "a122", "a1221", "a1222", "a1223", "a123", "a1231", "a1232", "a1233"};
        assertEquals(strArr2.length, nodeList2.size());
        SimpleNodeIterator elements2 = nodeList2.elements();
        int i2 = 0;
        while (elements2.hasMoreNodes()) {
            assertEquals(strArr2[i2], elements2.nextNode().getAttribute("id"));
            i2++;
        }
        NodeList nodeList3 = new NodeList();
        parse.elementAt(0).collectInto(nodeList3, HtmlNodeFilters.allExceptSubtree(HtmlNodeFilters.tagWithAttribute("div", "id", "a99"), HtmlNodeFilters.tagWithAttribute("div", "id", "a122")));
        nodeList3.keepAllNodesThatMatch(HtmlNodeFilters.tagWithAttribute("div", "id"));
        assertEquals(0, nodeList3.size());
    }

    public void testAncestor() throws Exception {
        NodeList parse = parse("<div class=\"bad1\">...a...<div class=\"good2\">...b...<div class=\"bad3\">...c...<div class=\"bad4\">...d...<p class=\"target\">...e...</p>...f...</div>...g...</div>...h...</div>...i...</div>");
        assertEquals(1, parse.size());
        NodeList nodeList = new NodeList();
        parse.elementAt(0).collectInto(nodeList, new AndFilter(HtmlNodeFilters.tagWithAttribute("p", "class", "target"), HtmlNodeFilters.ancestor(HtmlNodeFilters.tagWithAttribute("div", "class", "good2"))));
        assertEquals(1, nodeList.size());
        assertTrue(HtmlNodeFilters.tagWithAttribute("p", "class", "target").accept(nodeList.elementAt(0)));
    }

    public void testParent() throws Exception {
        NodeList parse = parse("<div class=\"bad\">...a...<div class=\"parent\">...b...<p class=\"target\">...c...</p>...d...</div>...e...</div>");
        assertEquals(1, parse.size());
        NodeList nodeList = new NodeList();
        parse.elementAt(0).collectInto(nodeList, new AndFilter(HtmlNodeFilters.tagWithAttribute("p", "class", "target"), HtmlNodeFilters.parent(HtmlNodeFilters.tagWithAttribute("div", "class", "parent"))));
        assertEquals(1, nodeList.size());
        assertTrue(HtmlNodeFilters.tagWithAttribute("p", "class", "target").accept(nodeList.elementAt(0)));
        NodeList nodeList2 = new NodeList();
        parse.elementAt(0).collectInto(nodeList2, HtmlNodeFilters.parent(HtmlNodeFilters.tagWithAttribute("div", "class", "parent")));
        assertEquals(1, nodeList2.size());
        assertTrue(HtmlNodeFilters.tagWithAttribute("p", "class", "target").accept(nodeList2.elementAt(0)));
    }
}
