package com.entopix.maui.vocab;

import com.entopix.maui.stemmers.Stemmer;
import com.entopix.maui.stopwords.Stopwords;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.SKOS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/entopix/maui/vocab/Vocabulary.class */
public class Vocabulary {
    private static final Logger log = LoggerFactory.getLogger(Vocabulary.class);
    private VocabularyStore vocabStore;
    private String vocabularyName;
    private transient Stemmer stemmer;
    private transient Stopwords stopwords;
    private String language = "en";
    private String encoding = "UTF-8";
    private boolean toLowerCase = true;
    private boolean reorder = true;
    private boolean serialize = false;

    /* loaded from: input_file:com/entopix/maui/vocab/Vocabulary$VocabularyException.class */
    public class VocabularyException extends Exception {
        private static final long serialVersionUID = 1;

        public VocabularyException(String str) {
            super(str);
        }
    }

    public void initializeVocabulary(String str, String str2) {
        this.vocabularyName = str;
        if (!str2.equals("skos")) {
            if (!str2.equals("text")) {
                throw new RuntimeException(str2 + " is an unsupported vocabulary format! Use skos or text");
            }
            File file = new File(str);
            if (!file.exists()) {
                log.error("Error while loading vocabulary from " + str);
                throw new RuntimeException(file.getAbsolutePath() + " does not exist!");
            }
            File file2 = new File(str.replace(".en", ".use"));
            if (!file2.exists()) {
                log.error("Error while loading vocabulary from " + str);
                throw new RuntimeException(file2.getAbsolutePath() + " does not exist!");
            }
            File file3 = new File(str.replace(".en", ".rel"));
            if (file3.exists()) {
                initializeFromTXTFiles(file, file2, file3);
                return;
            } else {
                log.error("Error while loading vocabulary from " + str);
                throw new RuntimeException(file3.getAbsolutePath() + " does not exist!");
            }
        }
        if (!str.endsWith(".rdf.gz") && !str.endsWith("rdf")) {
            log.error("Error while loading vocabulary from " + str);
            throw new RuntimeException("File " + str + " appears to be not in the skos format!");
        }
        File file4 = new File(str);
        try {
            if (file4.exists()) {
                initializeFromSKOSFile(file4);
            } else {
                InputStream resourceAsStream = Vocabulary.class.getClassLoader().getResourceAsStream(str);
                if (resourceAsStream == null) {
                    throw new IOException("No such file or resource");
                }
                if (str.endsWith(".gz")) {
                    resourceAsStream = new GZIPInputStream(resourceAsStream);
                }
                initializeFromStream(resourceAsStream);
            }
        } catch (Exception e) {
            log.error("Error while loading vocabulary from " + str);
            throw new RuntimeException("Error while loading vocabulary from " + str, e);
        }
    }

    public void initializeVocabulary(String str, Model model) throws VocabularyException {
        this.vocabularyName = str;
        if (model == null) {
            throw new VocabularyException("Model can't be null!");
        }
        initializeFromModel(model);
    }

    public void setLanguage(String str) {
        this.language = str;
    }

    public void setEncoding(String str) {
        this.encoding = str;
    }

    public void setLowerCase(boolean z) {
        this.toLowerCase = z;
    }

    public void setReorder(boolean z) {
        this.reorder = z;
    }

    public void setStemmer(Stemmer stemmer) {
        this.stemmer = stemmer;
    }

    public void setVocabularyStore(VocabularyStore vocabularyStore) {
        this.vocabStore = vocabularyStore;
    }

    public void setSerialize(boolean z) {
        this.serialize = z;
    }

    public void initializeFromModel(Model model) {
        this.vocabStore = VocabularyStoreFactory.createVocabStore(this.vocabularyName, this.stemmer, this.serialize);
        if (this.vocabStore.isInitialized()) {
            return;
        }
        log.info("--- Building the Vocabulary index from the RDF model...");
        Property createProperty = ResourceFactory.createProperty("http://www.w3.org/2002/07/owl#", "deprecated");
        int i = 0;
        ResIterator listResourcesWithProperty = model.listResourcesWithProperty(RDF.type, SKOS.Concept);
        while (listResourcesWithProperty.hasNext()) {
            Resource nextResource = listResourcesWithProperty.nextResource();
            Statement property = nextResource.getProperty(createProperty);
            if (property == null || !property.getBoolean()) {
                String uri = nextResource.getURI();
                Statement property2 = nextResource.getProperty(SKOS.prefLabel, this.language);
                if (property2 != null) {
                    Literal literal = property2.getLiteral();
                    String normalizePhrase = normalizePhrase(literal.getLexicalForm());
                    if (normalizePhrase.length() >= 1) {
                        this.vocabStore.addSense(normalizePhrase, uri);
                        this.vocabStore.addDescriptor(uri, literal.getLexicalForm());
                    }
                }
                for (Property property3 : new Property[]{SKOS.altLabel, SKOS.hiddenLabel}) {
                    StmtIterator listProperties = nextResource.listProperties(property3, this.language);
                    while (listProperties.hasNext()) {
                        Literal literal2 = listProperties.nextStatement().getLiteral();
                        String normalizePhrase2 = normalizePhrase(literal2.getLexicalForm());
                        if (normalizePhrase2.length() >= 1) {
                            this.vocabStore.addSense(normalizePhrase2, uri);
                        }
                        addNonDescriptor(i, uri, literal2.getLexicalForm(), normalizePhrase2);
                        i++;
                    }
                }
                for (Property property4 : new Property[]{SKOS.broader, SKOS.narrower, SKOS.related}) {
                    StmtIterator listProperties2 = nextResource.listProperties(property4);
                    while (listProperties2.hasNext()) {
                        this.vocabStore.addRelatedTerm(uri, listProperties2.nextStatement().getResource().getURI());
                    }
                }
            }
        }
        log.info("--- Statistics about the vocabulary: ");
        log.info("\t" + this.vocabStore.getNumTerms() + " terms in total");
        log.info("\t" + this.vocabStore.getNumNonDescriptors() + " non-descriptive terms");
        log.info("\t" + this.vocabStore.getNumRelatedTerms() + " terms have related terms");
        this.vocabStore.finishedInitialized();
        if (this.serialize) {
            VocabularyStoreFactory.serializeNewVocabStore(this.vocabularyName, this.vocabStore, this.stemmer);
        }
    }

    public void initializeFromSKOSFile(File file) throws IOException {
        if (!this.serialize) {
            initializeFromModel(readModelFromFile(file));
            return;
        }
        this.vocabStore = VocabularyStoreFactory.createVocabStore(this.vocabularyName, this.stemmer, this.serialize);
        if (this.vocabStore.isInitialized()) {
            return;
        }
        initializeFromModel(readModelFromFile(file));
    }

    public void initializeFromStream(InputStream inputStream) throws IOException {
        if (!this.serialize) {
            initializeFromModel(readModelFromInputStream(inputStream));
            return;
        }
        this.vocabStore = VocabularyStoreFactory.createVocabStore(this.vocabularyName, this.stemmer, this.serialize);
        if (this.vocabStore.isInitialized()) {
            return;
        }
        initializeFromModel(readModelFromInputStream(inputStream));
    }

    private Model readModelFromFile(File file) throws IOException {
        log.info("--- Loading RDF model from the SKOS file...");
        return readModelFromInputStream(file.getName().endsWith("rdf.gz") ? new GZIPInputStream(new FileInputStream(file)) : new FileInputStream(file));
    }

    private Model readModelFromInputStream(InputStream inputStream) throws IOException {
        Model createDefaultModel = ModelFactory.createDefaultModel();
        createDefaultModel.read(new InputStreamReader(inputStream, this.encoding), "");
        return createDefaultModel;
    }

    public void initializeFromTXTFiles(File file, File file2, File file3) {
        this.vocabStore = VocabularyStoreFactory.createVocabStore(this.vocabularyName, this.stemmer, this.serialize);
        if (this.vocabStore.isInitialized()) {
            return;
        }
        log.info("--- Loading Vocabulary from text files...");
        buildTEXT(file);
        buildUSE(file2);
        buildREL(file3);
        this.vocabStore.finishedInitialized();
        if (this.serialize) {
            VocabularyStoreFactory.serializeNewVocabStore(this.vocabularyName, this.vocabStore, this.stemmer);
        }
    }

    public void setStopwords(Stopwords stopwords) {
        this.stopwords = stopwords;
    }

    public VocabularyStore getVocabularyStore() {
        return this.vocabStore;
    }

    private void addNonDescriptor(int i, String str, String str2, String str3) {
        if (!this.vocabularyName.equals("lcsh") || str2.indexOf(40) == -1) {
            String str4 = "d_" + i;
            if (str3.length() >= 1) {
                this.vocabStore.addSense(str3, str4);
            }
            this.vocabStore.addDescriptor(str4, str2);
            this.vocabStore.addNonDescriptor(str4, str);
        }
    }

    public String getFormatedName(String str) {
        return this.vocabStore.getFormatedName(str);
    }

    public void buildTEXT(File file) {
        log.info("-- Building the Vocabulary index");
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file));
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    inputStreamReader.close();
                    return;
                } else {
                    int indexOf = readLine.indexOf(32);
                    String substring = readLine.substring(indexOf + 1);
                    if (normalizePhrase(substring).length() >= 1) {
                        this.vocabStore.addDescriptor(readLine.substring(0, indexOf), substring);
                    }
                }
            }
        } catch (IOException e) {
            log.error("Error while loading vocabulary from " + file.getAbsolutePath() + "!\n", e);
            throw new RuntimeException();
        }
    }

    public void buildUSE(File file) {
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file));
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    inputStreamReader.close();
                    return;
                } else {
                    String[] split = readLine.split("\t");
                    if (split[1].indexOf(" ") == -1) {
                        this.vocabStore.addNonDescriptor(split[0], split[1]);
                    }
                }
            }
        } catch (IOException e) {
            log.error("Error while loading vocabulary from " + file.getAbsolutePath() + "!\n", e);
            throw new RuntimeException();
        }
    }

    public void buildREL(File file) {
        log.info("-- Building the Vocabulary index with related pairs");
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file));
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    inputStreamReader.close();
                    return;
                }
                String[] split = readLine.split("\t");
                for (String str : split[1].split(" ")) {
                    this.vocabStore.addRelatedTerm(split[0], str);
                }
            }
        } catch (IOException e) {
            log.error("Error while loading vocabulary from " + file.getAbsolutePath() + "!\n", e);
            throw new RuntimeException();
        }
    }

    public String getTerm(String str) {
        return this.vocabStore.getTerm(str);
    }

    public boolean containsNormalizedEntry(String str) {
        return this.vocabStore.getNumSenses(normalizePhrase(str)) > 0;
    }

    public boolean isAmbiguous(String str) {
        return this.vocabStore.getNumSenses(normalizePhrase(str)) > 1;
    }

    public ArrayList<String> getSenses(String str) {
        return this.vocabStore.getSensesForPhrase(normalizePhrase(str));
    }

    public ArrayList<String> getRelated(String str) {
        return this.vocabStore.getRelatedTerms(str);
    }

    private boolean isOkToLower(String str) {
        int i = 0;
        int i2 = 0;
        for (char c : str.toCharArray()) {
            if (Character.isLowerCase(c)) {
                i++;
            }
            if (Character.isUpperCase(c)) {
                i2++;
            }
        }
        return i2 <= i || i2 >= 5;
    }

    public String normalizePhrase(String str) {
        if (str.endsWith("-") || str.endsWith(".")) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        char c = ' ';
        int i = 0;
        while (i < str.length()) {
            char charAt = str.charAt(i);
            if (this.vocabularyName.equals("mesh") && charAt == '/') {
                break;
            }
            if (charAt == '&' || charAt == '.' || charAt == '.') {
                charAt = ' ';
            }
            if (charAt == '*' || charAt == ':') {
                c = charAt;
                i++;
            } else {
                if (charAt != ' ' || c != ' ') {
                    sb.append(charAt);
                }
                c = charAt;
                i++;
            }
        }
        String trim = sb.toString().trim();
        if (isOkToLower(trim) && this.toLowerCase) {
            trim = trim.toLowerCase();
        }
        if (this.reorder || this.stopwords != null || this.stemmer != null) {
            trim = pseudoPhrase(trim);
        }
        return trim.equals("") ? sb.toString() : trim;
    }

    public String pseudoPhrase(String str) {
        String str2 = "";
        String[] split = str.split(" ");
        if (this.reorder) {
            Arrays.sort(split);
        }
        int length = split.length;
        for (int i = 0; i < length; i++) {
            String str3 = split[i];
            if (this.stopwords == null || !this.stopwords.isStopword(str3)) {
                int indexOf = str3.indexOf(39);
                if (indexOf != -1 && indexOf == str3.length() - 2) {
                    str3 = str3.substring(0, indexOf);
                }
                if (this.stemmer != null) {
                    str3 = this.stemmer.stem(str3);
                }
                str2 = str2 + str3 + " ";
            }
        }
        return str2.trim();
    }

    public void setVocabularyName(String str) {
        this.vocabularyName = str;
    }

    public double getGenerality(String str) {
        return 0.0d;
    }
}
