package prerna.poi.main;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Iterator;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.xml.sax.SAXException;

/* loaded from: input_file:prerna/poi/main/TextExtractor.class */
public final class TextExtractor {
    private TextExtractor() {
    }

    public static String websiteTextExtractor(String str) throws IOException {
        boolean z = false;
        Document document = Jsoup.connect(str).get();
        String str2 = "";
        if (str.contains("nytimes.com")) {
            z = true;
            Iterator it = document.select("p.story-body-text").iterator();
            while (it.hasNext()) {
                Element element = (Element) it.next();
                if (element.hasText()) {
                    str2 = str2.concat(element.text().toString());
                }
            }
        }
        if (!z) {
            str2 = document.text();
        }
        return str2.replace("\n", " @ ").replace("\r", " ");
    }

    public static String fileTextExtractor(String str) throws IOException, SAXException, TikaException {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        AutoDetectParser autoDetectParser = new AutoDetectParser(new DefaultDetector());
        parseContext.set(Parser.class, autoDetectParser);
        File file = new File(str);
        TikaInputStream tikaInputStream = TikaInputStream.get((file.exists() && file.isFile()) ? file.toURI().toURL() : new URL(str), metadata);
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        autoDetectParser.parse(tikaInputStream, new BodyContentHandler(byteArrayOutputStream), metadata, parseContext);
        tikaInputStream.close();
        String obj = byteArrayOutputStream.toString();
        byteArrayOutputStream.close();
        return obj.replace("\n+|\r+", " ");
    }

    public static String readFile(String str, Charset charset) throws IOException {
        return new String(Files.readAllBytes(Paths.get(str, new String[0])), charset);
    }
}
