package prerna.poi.main;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.TreeGraphNode;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.util.CoreMap;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Pattern;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.xml.sax.SAXException;
import prerna.algorithm.nlp.NLPSingletons;
import prerna.algorithm.nlp.NaturalLanguageProcessingHelper;

/* loaded from: input_file:prerna/poi/main/ProcessNLP.class */
public class ProcessNLP {
    private List<TripleWrapper> triples;
    private LexicalizedParser lp = NLPSingletons.getInstance().getLp();
    private static final Logger LOGGER = LogManager.getLogger(ProcessNLP.class.getName());

    public List<TripleWrapper> generateTriples(String[] strArr) throws IOException {
        this.triples = new ArrayList();
        for (String str : strArr) {
            processFile(str);
        }
        createOccuranceCount();
        lemmatize();
        return this.triples;
    }

    private void processFile(String str) throws IOException {
        List<String> readDoc = readDoc(str);
        int size = readDoc.size();
        for (int i = 0; i < size; i++) {
            String str2 = readDoc.get(i);
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            if (NaturalLanguageProcessingHelper.createDepList(this.lp, str2, arrayList, arrayList2)) {
                Hashtable<GrammaticalRelation, Vector<TypedDependency>> hashtable = new Hashtable<>();
                Hashtable<String, String> hashtable2 = new Hashtable<>();
                NaturalLanguageProcessingHelper.setTypeDependencyHash(arrayList, hashtable);
                generateTriples(str2, str.substring(str.lastIndexOf(File.separator) + 1), arrayList2, hashtable2, hashtable);
            }
        }
    }

    public List<String> readDoc(String str) throws IOException {
        ArrayList arrayList = new ArrayList();
        try {
            if (str.contains("http")) {
                LOGGER.info("Extracting text from a web-page...");
                readFile(TextExtractor.websiteTextExtractor(str), arrayList);
            }
            if (str.endsWith(".doc") || str.endsWith(".docx")) {
                LOGGER.info("Extracting text from a word document...");
                readFile(TextExtractor.fileTextExtractor(str), arrayList);
            }
            if (str.endsWith(".txt")) {
                LOGGER.info("Extracting text from a text file...");
                readFile(TextExtractor.fileTextExtractor(str), arrayList);
            }
            return arrayList;
        } catch (IOException | SAXException | TikaException e) {
            e.printStackTrace();
            throw new IOException("Error extrating text from document");
        }
    }

    private void readFile(String str, List<String> list) {
        Pattern compile = Pattern.compile("(?<!Mr)(?<!Mrs)(?<!Dr)(?<!Ms)(?<!\\.[A-Z])\\. *\\s|\\? *\\s|\\! *\\s");
        Scanner scanner = new Scanner(str);
        scanner.useDelimiter(compile);
        while (scanner.hasNext()) {
            list.add(scanner.next().replaceAll("\\r\\n|\\r|\\n", " ").replace("\n", "").replace("\r", ""));
        }
        scanner.close();
    }

    public void generateTriples(String str, String str2, List<TaggedWord> list, Hashtable<String, String> hashtable, Hashtable<GrammaticalRelation, Vector<TypedDependency>> hashtable2) {
        NaturalLanguageProcessingHelper.createNegations(hashtable, hashtable2);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.NOMINAL_SUBJECT, EnglishGrammaticalRelations.DIRECT_OBJECT);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.AGENT, EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.CONTROLLING_SUBJECT, EnglishGrammaticalRelations.DIRECT_OBJECT);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT, EnglishGrammaticalRelations.PREPOSITIONAL_MODIFIER);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.NOMINAL_SUBJECT, EnglishGrammaticalRelations.COPULA);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.NOMINAL_SUBJECT, EnglishGrammaticalRelations.ADJECTIVAL_COMPLEMENT);
        findTriples(str, str2, list, hashtable, hashtable2, EnglishGrammaticalRelations.NOMINAL_SUBJECT, EnglishGrammaticalRelations.PREPOSITIONAL_MODIFIER);
    }

    public void findTriples(String str, String str2, List<TaggedWord> list, Hashtable<String, String> hashtable, Hashtable<GrammaticalRelation, Vector<TypedDependency>> hashtable2, GrammaticalRelation grammaticalRelation, GrammaticalRelation grammaticalRelation2) {
        Vector<TypedDependency> vector = hashtable2.get(grammaticalRelation2);
        Vector<TypedDependency> vector2 = hashtable2.get(grammaticalRelation);
        if (vector == null || vector2 == null) {
            return;
        }
        int size = vector.size();
        for (int i = 0; i < size; i++) {
            TreeGraphNode dep = vector.get(i).dep();
            TreeGraphNode gov = vector.get(i).gov();
            String value = gov.value();
            String str3 = null;
            if (vector.get(i).toString().contains("prep")) {
                dep = NaturalLanguageProcessingHelper.findPrepObject(vector, vector2, hashtable2, EnglishGrammaticalRelations.PREPOSITIONAL_MODIFIER, EnglishGrammaticalRelations.PREPOSITIONAL_OBJECT);
                if (dep != null) {
                    str3 = vector.get(i).dep().toString();
                }
            }
            int size2 = vector2.size();
            for (int i2 = 0; i2 < size2; i2++) {
                TreeGraphNode dep2 = vector2.get(i2).dep();
                TreeGraphNode gov2 = vector2.get(i2).gov();
                if (gov2.toString().equalsIgnoreCase(gov.toString())) {
                    if (dep2.label().tag().contains("JJ")) {
                        TreeGraphNode treeGraphNode = gov;
                        gov = dep;
                        dep = treeGraphNode;
                    }
                    TripleWrapper tripleWrapper = new TripleWrapper();
                    tripleWrapper.setObj1(formatString(dep2.value(), false, true));
                    tripleWrapper.setPred(formatString(gov.value(), false, true));
                    tripleWrapper.setObj2(formatString(dep.value(), false, true));
                    TreeGraphNode findCompObject = NaturalLanguageProcessingHelper.findCompObject(gov2, hashtable2);
                    if (!dep2.label().tag().contains("NN") && (hashtable2.containsKey(EnglishGrammaticalRelations.CLAUSAL_COMPLEMENT) || hashtable2.containsKey(EnglishGrammaticalRelations.XCLAUSAL_COMPLEMENT))) {
                        dep2 = NaturalLanguageProcessingHelper.findComplementNoun(dep2, gov2, hashtable2, EnglishGrammaticalRelations.CLAUSAL_COMPLEMENT);
                        if (!dep2.label().tag().contains("NN")) {
                            dep2 = NaturalLanguageProcessingHelper.findCompSubject(gov2, hashtable2);
                        }
                    }
                    String fullNoun = NaturalLanguageProcessingHelper.getFullNoun(dep2);
                    String str4 = NaturalLanguageProcessingHelper.getFullNoun(dep) + NaturalLanguageProcessingHelper.findPrepNounForPredicate(gov, hashtable2);
                    if (hashtable.containsKey(gov + "") || hashtable.containsKey(findCompObject + "")) {
                        value = "NOT " + value;
                    }
                    if (str3 != null) {
                        value = value + str3;
                    }
                    tripleWrapper.setObj1Expanded(formatString(fullNoun.toString(), true, false));
                    tripleWrapper.setPredExpanded(formatString(value.toString(), true, false));
                    tripleWrapper.setObj2Expanded(formatString(str4.toString(), true, false));
                    tripleWrapper.setDocName(str2);
                    tripleWrapper.setSentence(str);
                    this.triples.add(tripleWrapper);
                }
            }
        }
    }

    private String formatString(String str, boolean z, boolean z2) {
        if (str == null || str.isEmpty()) {
            return "NA";
        }
        String str2 = str;
        if (z) {
            str2 = str.replace("'", ",").replace("`", ",");
        }
        if (z2) {
            str2 = str2.toLowerCase();
        }
        return str2;
    }

    private void createOccuranceCount() {
        Hashtable hashtable = new Hashtable();
        int size = this.triples.size();
        for (int i = 0; i < size; i++) {
            for (String str : new String[]{this.triples.get(i).getObj1(), this.triples.get(i).getPred(), this.triples.get(i).getObj2()}) {
                if (hashtable.containsKey(str)) {
                    hashtable.put(str, Integer.valueOf(((Integer) hashtable.get(str)).intValue() + 1));
                } else {
                    hashtable.put(str, 1);
                }
            }
        }
        for (int i2 = 0; i2 < size; i2++) {
            this.triples.get(i2).setObj1Count(((Integer) hashtable.get(this.triples.get(i2).getObj1())).intValue());
            this.triples.get(i2).setPredCount(((Integer) hashtable.get(this.triples.get(i2).getPred())).intValue());
            this.triples.get(i2).setObj2Count(((Integer) hashtable.get(this.triples.get(i2).getObj2())).intValue());
        }
    }

    public void lemmatize() {
        StanfordCoreNLP stanfordCoreNLP = new StanfordCoreNLP();
        int size = this.triples.size();
        for (int i = 0; i < size; i++) {
            Annotation annotation = new Annotation(this.triples.get(i).getPred());
            stanfordCoreNLP.annotate(annotation);
            Iterator it = ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).iterator();
            while (it.hasNext()) {
                Iterator it2 = ((List) ((CoreMap) it.next()).get(CoreAnnotations.TokensAnnotation.class)).iterator();
                while (it2.hasNext()) {
                    this.triples.get(i).setPred((String) ((CoreLabel) it2.next()).get(CoreAnnotations.LemmaAnnotation.class));
                }
            }
        }
    }
}
