package org.lockss.filter.html;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.util.List;
import junit.framework.ComparisonFailure;
import org.htmlparser.NodeFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserUtils;
import org.lockss.crawler.TestBaseCrawler;
import org.lockss.test.ConfigurationUtil;
import org.lockss.test.LockssTestCase;
import org.lockss.test.StringInputStream;
import org.lockss.util.EncodedThing;
import org.lockss.util.IOUtil;
import org.lockss.util.ListUtil;
import org.lockss.util.ReaderInputStream;
import org.lockss.util.StringUtil;
import org.lockss.util.UrlUtil;

/* loaded from: input_file:org/lockss/filter/html/TestHtmlFilterInputStream.class */
public class TestHtmlFilterInputStream extends LockssTestCase {
    static String ISO = "ISO-8859-1";
    static String UTF8 = "UTF-8";

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/lockss/filter/html/TestHtmlFilterInputStream$IdentityXform.class */
    public class IdentityXform implements HtmlTransform {
        IdentityXform() {
        }

        public NodeList transform(NodeList nodeList) {
            return nodeList;
        }
    }

    /* loaded from: input_file:org/lockss/filter/html/TestHtmlFilterInputStream$MyLinkTag.class */
    static class MyLinkTag extends LinkTag {
        private static final String[] mEnders = {"A", "P", "DIV", "TD", "TR", "FORM", "LI"};
        private static final String[] mEndTagEnders = {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"};
        List lst;

        public MyLinkTag(List list) {
            this.lst = list;
        }

        public String[] getEnders() {
            return mEnders;
        }

        public String[] getEndTagEnders() {
            return mEndTagEnders;
        }

        public void setStartPosition(int i) {
            this.lst.add("s" + i);
        }

        public void setEndPosition(int i) {
            this.lst.add("e" + i);
        }
    }

    private void assertFilterString(String str, String str2, HtmlTransform htmlTransform) throws IOException {
        assertFilterString(str, str2, null, null, null, htmlTransform);
    }

    private void assertFilterString(String str, String str2, String str3, String str4, String str5, HtmlTransform htmlTransform) throws IOException {
        HtmlFilterInputStream htmlFilterInputStream = new HtmlFilterInputStream(str3 == null ? new StringInputStream(str2) : new ReaderInputStream(new StringReader(str2), str3), str4, str5, htmlTransform);
        if (str3 != null) {
            assertInputStreamMatchesString(str, (InputStream) htmlFilterInputStream, str3);
        } else {
            assertInputStreamMatchesString(str, htmlFilterInputStream);
        }
        assertEquals(-1, htmlFilterInputStream.read());
        htmlFilterInputStream.close();
        System.gc();
        try {
            htmlFilterInputStream.read();
            fail("closed InputStream should throw");
        } catch (IOException e) {
        }
    }

    private void assertIdentityXform(String str, String str2) throws IOException {
        assertFilterString(str, str2, new IdentityXform());
    }

    private void assertIdentityXform(String str, String str2, String str3, String str4, String str5) throws IOException {
        assertFilterString(str, str2, str3, str4, str5, new IdentityXform());
    }

    public void testIll() {
        try {
            new HtmlFilterInputStream((InputStream) null, new IdentityXform());
            fail("null InputStream should throw");
        } catch (IllegalArgumentException e) {
        }
        try {
            new HtmlFilterInputStream(new StringInputStream("blah"), (HtmlTransform) null);
            fail("null xform should throw");
        } catch (IllegalArgumentException e2) {
        }
    }

    public void testIdentityXform() throws IOException {
        assertFilterString("<html>foo</html>", "<html>foo</html>", new IdentityXform());
    }

    public void testEmpty() throws IOException {
        assertFilterString(TestBaseCrawler.EMPTY_PAGE, TestBaseCrawler.EMPTY_PAGE, new IdentityXform());
        MockHtmlTransform mockHtmlTransform = new MockHtmlTransform(ListUtil.list(new NodeList[]{new NodeList()}));
        assertFilterString(TestBaseCrawler.EMPTY_PAGE, TestBaseCrawler.EMPTY_PAGE, mockHtmlTransform);
        assertEquals(0, mockHtmlTransform.getArg(0).size());
    }

    NodeList parse(String str) throws Exception {
        return ParserUtils.createParserParsingAnInputString(str).parse((NodeFilter) null);
    }

    public void testXform() throws Exception {
        MockHtmlTransform mockHtmlTransform = new MockHtmlTransform(ListUtil.list(new NodeList[]{parse("<i>it</i>")}));
        assertFilterString("<i>it</i>", "<b>bold</b>", mockHtmlTransform);
        NodeList arg = mockHtmlTransform.getArg(0);
        assertEquals(3, arg.size());
        assertEquals("<b>", arg.elementAt(0).toHtml());
        assertEquals("bold", arg.elementAt(1).toHtml());
        assertEquals("</b>", arg.elementAt(2).toHtml());
    }

    public void testUnclosed1() throws IOException {
        assertIdentityXform("<HTML><BODY><ul><li>l1<li>l2<div>text1</ul>tween<ul><li>l3<li>l4<div><script>text2</ul></body></html>", "<HTML><BODY><ul><li>l1<li>l2<div>text1</ul>tween<ul><li>l3<li>l4<div><script>text2</ul></body></html>");
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.verbatim", "false");
        assertIdentityXform("<HTML><BODY><ul><li>l1</li><li>l2<div>text1</div></li></ul>tween<ul><li>l3</li><li>l4<div><script>text2</script></div></li></ul></body></html>", "<HTML><BODY><ul><li>l1<li>l2<div>text1</ul>tween<ul><li>l3<li>l4<div><script>text2</ul></body></html>");
    }

    public void testUnclosed2() throws IOException {
        assertIdentityXform("<HTML><BODY><dl><dt>t1<dd>d1<div>text1</dl><dl><dt>t2<dd>d2<div><script>text2</dl></body></html>", "<HTML><BODY><dl><dt>t1<dd>d1<div>text1</dl><dl><dt>t2<dd>d2<div><script>text2</dl></body></html>");
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.verbatim", "false");
        assertIdentityXform("<HTML><BODY><dl><dt>t1</dt><dd>d1<div>text1</div></dd></dl><dl><dt>t2</dt><dd>d2<div><script>text2</script></div></dd></dl></body></html>", "<HTML><BODY><dl><dt>t1<dd>d1<div>text1</dl><dl><dt>t2<dd>d2<div><script>text2</dl></body></html>");
    }

    public void testScript() throws IOException {
        assertIdentityXform("<HTML><BODY><script>document.write (\"<a>This is strictly illegal</A>\") more('stu<a style=\"' + 'foo\">ff'); more('<h2'); more('stuff</h2>'); more('stuff</a>');</script></body></html>", "<HTML><BODY><script>document.write (\"<a>This is strictly illegal</A>\") more('stu<a style=\"' + 'foo\">ff'); more('<h2'); more('stuff</h2>'); more('stuff</a>');</script></body></html>");
    }

    public void testCharset() throws Exception {
        ConfigurationUtil.addFromArgs("org.lockss.filter.html.adaptEncoding", "false");
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", "ISO-8859-1");
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", null);
        assertIdentityXform("<html><body>abcý1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "UTF-8", null);
        assertIdentityXform("<html><body>abc≠</body></html>", "<html><body>abc≠</body></html>", "UTF-8", "UTF-8", "UTF-8");
        assertIdentityXform("<html><body>abc`</body></html>", "<html><body>abc≠</body></html>", "UTF-8", "UTF-8", null);
        try {
            assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", "UTF-8");
            fail("Shouldn't match String read with different encoding");
        } catch (ComparisonFailure e) {
        }
        ConfigurationUtil.addFromArgs("org.lockss.filter.html.adaptEncoding", "true");
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", "ISO-8859-1");
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", "UTF-16");
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", null);
        assertIdentityXform("<html><body>abcý1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "UTF-8", null);
        assertIdentityXform("<html><body>abc≠</body></html>", "<html><body>abc≠</body></html>", "UTF-8", "UTF-8", "UTF-8");
        assertIdentityXform("<html><body>abc`</body></html>", "<html><body>abc≠</body></html>", "UTF-8", "UTF-8", null);
        assertIdentityXform("<html><body>abcé1234</body></html>", "<html><body>abcé1234</body></html>", "ISO-8859-1", "ISO-8859-1", "UTF-8");
    }

    public void testKnowsEncoding() throws Exception {
        EncodedThing htmlFilterInputStream = new HtmlFilterInputStream(new ReaderInputStream(new StringReader("<html><body>abcé1234</body></html>"), ISO), ISO, ISO, new IdentityXform());
        assertInputStreamMatchesString("<html><body>abcé1234</body></html>", (InputStream) htmlFilterInputStream, ISO);
        assertTrue(htmlFilterInputStream instanceof EncodedThing);
        assertEquals(ISO, htmlFilterInputStream.getCharset());
    }

    public void testKnowsEncodingChange() throws Exception {
        EncodedThing htmlFilterInputStream = new HtmlFilterInputStream(new ReaderInputStream(new StringReader("<html><head><META http-equiv=Content-Type content=\"text/html; charset=utf-8\"></head></body>abcé1234</body></html>"), UTF8), ISO, ISO, new IdentityXform());
        assertTrue(htmlFilterInputStream instanceof EncodedThing);
        EncodedThing encodedThing = htmlFilterInputStream;
        assertEquals(UTF8, encodedThing.getCharset());
        assertInputStreamMatchesString("<html><head><META http-equiv=Content-Type content=\"text/html; charset=utf-8\"></head></body>abcé1234</body></html>", (InputStream) htmlFilterInputStream, UTF8);
        assertEquals(UTF8, encodedThing.getCharset());
    }

    public void testKnowsEncodingChangeCharsetMeta() throws Exception {
        EncodedThing htmlFilterInputStream = new HtmlFilterInputStream(new ReaderInputStream(new StringReader("<html><head><META charset=utf-8></head></body>abcé1234</body></html>"), UTF8), ISO, ISO, new IdentityXform());
        assertTrue(htmlFilterInputStream instanceof EncodedThing);
        EncodedThing encodedThing = htmlFilterInputStream;
        assertEquals(UTF8, encodedThing.getCharset());
        assertInputStreamMatchesString("<html><head><META charset=utf-8></head></body>abcé1234</body></html>", (InputStream) htmlFilterInputStream, UTF8);
        assertEquals(UTF8, encodedThing.getCharset());
    }

    public void testChangeCharsetFailsIfNoMark() throws Exception {
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.mark", "0");
        log.info("read(): exception following is expected");
        try {
            doParseCharset();
            fail("parser should fail to reset() input stream if not mark()ed");
        } catch (IOException e) {
        }
    }

    public void testChangeCharsetBadCharConfig() throws Exception {
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.encodingMatchRange", "1000");
        URL resource = getResource("charset-change3.txt");
        BufferedInputStream bufferedInputStream = null;
        InputStream inputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(UrlUtil.openInputStream(resource.toString()));
            inputStream = UrlUtil.openInputStream(resource.toString());
            assertEquals(StringUtil.fromReader(new InputStreamReader(inputStream, "UTF-8")).substring(3227), StringUtil.fromReader(new InputStreamReader((InputStream) new HtmlFilterInputStream(bufferedInputStream, "ISO-8859-1", "UTF-8", new IdentityXform()), "UTF-8")).substring(3234));
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
            throw th;
        }
    }

    public void testChangeCharsetBadCharSetter() throws Exception {
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.encodingMatchRange", "0");
        URL resource = getResource("charset-change3.txt");
        BufferedInputStream bufferedInputStream = null;
        InputStream inputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(UrlUtil.openInputStream(resource.toString()));
            inputStream = UrlUtil.openInputStream(resource.toString());
            InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "UTF-8");
            HtmlFilterInputStream htmlFilterInputStream = new HtmlFilterInputStream(bufferedInputStream, "ISO-8859-1", "UTF-8", new IdentityXform());
            htmlFilterInputStream.setEncodingMatchRange(128);
            assertEquals(StringUtil.fromReader(inputStreamReader).substring(3227), StringUtil.fromReader(new InputStreamReader((InputStream) htmlFilterInputStream, "UTF-8")).substring(3234));
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
            throw th;
        }
    }

    public void testChangeCharsetBadCharLargeRange() throws Exception {
        URL resource = getResource("charset-change3.txt");
        BufferedInputStream bufferedInputStream = null;
        InputStream inputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(UrlUtil.openInputStream(resource.toString()));
            inputStream = UrlUtil.openInputStream(resource.toString());
            InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "UTF-8");
            HtmlFilterInputStream htmlFilterInputStream = new HtmlFilterInputStream(bufferedInputStream, "ISO-8859-1", "UTF-8", new IdentityXform());
            htmlFilterInputStream.setEncodingMatchRange(10000);
            assertEquals(StringUtil.fromReader(inputStreamReader).substring(3227), StringUtil.fromReader(new InputStreamReader((InputStream) htmlFilterInputStream, "UTF-8")).substring(3234));
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
            throw th;
        }
    }

    public void testChangeCharsetBadCharLateChange() throws Exception {
        URL resource = getResource("charset-change3.txt");
        BufferedInputStream bufferedInputStream = null;
        InputStream inputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(UrlUtil.openInputStream(resource.toString()));
            inputStream = UrlUtil.openInputStream(resource.toString());
            InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "UTF-8");
            HtmlFilterInputStream htmlFilterInputStream = new HtmlFilterInputStream(bufferedInputStream, "ISO-8859-1", "UTF-8", new IdentityXform());
            htmlFilterInputStream.setEncodingMatchRange(1000);
            assertEquals(StringUtil.fromReader(inputStreamReader).substring(3227), StringUtil.fromReader(new InputStreamReader((InputStream) htmlFilterInputStream, "UTF-8")).substring(3234));
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
            throw th;
        }
    }

    public void testChangeCharsetMatchRangeDisabled() throws Exception {
        ConfigurationUtil.setFromArgs("org.lockss.filter.html.encodingMatchRange", "0");
        BufferedInputStream bufferedInputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(UrlUtil.openInputStream(getResource("charset-change3.txt").toString()));
            StringUtil.fromReader(new InputStreamReader((InputStream) new HtmlFilterInputStream(bufferedInputStream, "ISO-8859-1", "UTF-8", new IdentityXform()), "UTF-8"));
            fail("encodingMatchRange set to zero, mismatch should throw");
            IOUtil.safeClose(bufferedInputStream);
        } catch (IOException e) {
            IOUtil.safeClose(bufferedInputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            throw th;
        }
    }

    public void testChangeCharset() throws Exception {
        doParseCharset();
    }

    void doParseCharset() throws Exception {
        URL resource = getResource("rewind-test.txt");
        BufferedInputStream bufferedInputStream = null;
        InputStream inputStream = null;
        try {
            InputStream openInputStream = UrlUtil.openInputStream(resource.toString());
            assertNotNull(openInputStream);
            bufferedInputStream = new BufferedInputStream(openInputStream);
            inputStream = UrlUtil.openInputStream(resource.toString());
            assertReaderMatchesString(StringUtil.fromReader(new InputStreamReader(inputStream, "iso-8859-1")), StringUtil.getLineReader(new HtmlFilterInputStream(bufferedInputStream, new IdentityXform())));
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
        } catch (Throwable th) {
            IOUtil.safeClose(bufferedInputStream);
            IOUtil.safeClose(inputStream);
            throw th;
        }
    }
}
