package org.grobid.core.engines;

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.grobid.core.annotations.TeiStAXParser;
import org.grobid.core.data.Affiliation;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.BiblioSet;
import org.grobid.core.data.ChemicalEntity;
import org.grobid.core.data.Date;
import org.grobid.core.data.PatentItem;
import org.grobid.core.data.Person;
import org.grobid.core.document.Document;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidResourceException;
import org.grobid.core.lang.Language;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.Pair;
import org.grobid.core.utilities.Utilities;
import org.grobid.core.utilities.counters.CntManager;
import org.grobid.core.utilities.counters.impl.CntManagerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/grobid-core-0.3.4.jar:org/grobid/core/engines/Engine.class */
public class Engine implements Closeable {
    private final EngineParsers parsers = new EngineParsers();
    private List<String> acceptedLanguages = null;
    private static final Logger LOGGER = LoggerFactory.getLogger(Engine.class);
    private static CntManager cntManager = CntManagerFactory.getCntManager();

    public List<Person> processAuthorsHeader(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(str);
        return this.parsers.getAuthorParser().processingHeader(arrayList);
    }

    public List<Person> processAuthorsCitation(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(str);
        return this.parsers.getAuthorParser().processingCitation(arrayList);
    }

    public List<List<Person>> processAuthorsCitationLists(List<String> list) throws Exception {
        return null;
    }

    public List<Affiliation> processAffiliation(String str) throws IOException {
        return this.parsers.getAffiliationAddressParser().processing(str);
    }

    public List<List<Affiliation>> processAffiliations(List<String> list) throws Exception {
        ArrayList arrayList = null;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            ArrayList<Affiliation> processing = this.parsers.getAffiliationAddressParser().processing(it.next());
            if (arrayList == null) {
                arrayList = new ArrayList();
            }
            arrayList.add(processing);
        }
        return arrayList;
    }

    public List<Date> processDate(String str) throws IOException {
        return this.parsers.getDateParser().processing(str);
    }

    public List<List<Date>> processDates(List<String> list) {
        return null;
    }

    public BiblioItem processRawReference(String str, boolean z) {
        if (str != null) {
            str = str.replaceAll("\\\\", "");
        }
        return this.parsers.getCitationParser().processing(str, z);
    }

    public List<BiblioItem> processRawReferences(List<String> list, boolean z) throws Exception {
        if (list == null || list.size() == 0) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(this.parsers.getCitationParser().processing(it.next(), z));
        }
        return arrayList;
    }

    public List<BibDataSet> processReferences(String str, boolean z) {
        return this.parsers.getCitationParser().processingReferenceSection(str, this.parsers.getReferenceSegmenterParser(), z);
    }

    public void createTrainingReferenceSegmentation(String str, String str2, int i) throws Exception {
        if (str == null) {
            throw new GrobidResourceException("Cannot process pdf file, because input file was null.");
        }
        File file = new File(str);
        if (!file.exists()) {
            throw new GrobidResourceException("Cannot process pdf file, because input file '" + file.getAbsolutePath() + "' does not exists.");
        }
        File file2 = new File(str2);
        if (!file2.exists() && !file2.mkdirs()) {
            throw new GrobidResourceException("Cannot start parsing, because cannot create output path for tei files on location '" + file2.getAbsolutePath() + "'.");
        }
        try {
            Document processing = this.parsers.getSegmentationParser().processing(str);
            String documentPartText = processing.getDocumentPartText(SegmentationLabel.REFERENCES);
            if (!documentPartText.isEmpty()) {
                Pair<String, String> createTrainingData = this.parsers.getReferenceSegmenterParser().createTrainingData(processing, i);
                String a = createTrainingData.getA();
                String b = createTrainingData.getB();
                if (a != null) {
                    OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + file.getName().replace(".pdf", ".training.referenceSegmenter.tei.xml")), false), "UTF-8");
                    outputStreamWriter.write(a + "\n");
                    outputStreamWriter.close();
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + file.getName().replace(".pdf", ".training.referenceSegmenter")), false), "UTF-8");
                    outputStreamWriter2.write(b + "\n");
                    outputStreamWriter2.close();
                    OutputStreamWriter outputStreamWriter3 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + file.getName().replace(".pdf", ".training.referenceSegmenter.rawtxt")), false), "UTF-8");
                    outputStreamWriter3.write(documentPartText + "\n");
                    outputStreamWriter3.close();
                }
            }
        } catch (IOException e) {
            throw new GrobidException("An IO exception occurred while running Grobid.", e);
        }
    }

    public int batchCreateTrainingReferenceSegmentation(String str, String str2, int i) {
        return batchCreateTraining(str, str2, i, 3);
    }

    public String downloadPDF(String str, String str2, String str3) {
        return Utilities.uploadFile(str, str2, str3);
    }

    public List<String> getAcceptedLanguages() {
        return this.acceptedLanguages;
    }

    public void addAcceptedLanguages(String str) {
        if (this.acceptedLanguages == null) {
            this.acceptedLanguages = new ArrayList();
        }
        this.acceptedLanguages.add(str);
    }

    public Language runLanguageId(String str, String str2) {
        try {
            String str3 = "";
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str.substring(0, str.length() - 3) + str2), "UTF-8"));
            int i = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null || i >= 5000) {
                    break;
                }
                if (readLine.length() != 0) {
                    str3 = str3 + " " + readLine;
                    i += readLine.length();
                }
            }
            bufferedReader.close();
            return LanguageUtilities.getInstance().runLanguageId(str3);
        } catch (IOException e) {
            throw new GrobidException("An exception occurred while running Grobid.", e);
        }
    }

    public Language runLanguageId(String str) {
        return runLanguageId(str, "body");
    }

    public String processHeader(String str, boolean z, BiblioItem biblioItem) {
        return processHeader(str, z, 0, 2, biblioItem);
    }

    public String processHeader(String str, boolean z, int i, int i2, BiblioItem biblioItem) {
        if (biblioItem == null) {
            biblioItem = new BiblioItem();
        }
        org.apache.commons.lang3.tuple.Pair<String, Document> processing2 = this.parsers.getHeaderParser().processing2(str, z, biblioItem, i, i2);
        processing2.getRight();
        return processing2.getLeft();
    }

    public String segmentAndProcessHeader(String str, boolean z, BiblioItem biblioItem) {
        if (biblioItem == null) {
            biblioItem = new BiblioItem();
        }
        org.apache.commons.lang3.tuple.Pair<String, Document> processing = this.parsers.getHeaderParser().processing(str, z, biblioItem);
        processing.getRight();
        return processing.getLeft();
    }

    public void createTrainingHeader(String str, String str2, String str3, int i) {
        this.parsers.getHeaderParser().createTrainingHeader(str, str2, str3);
    }

    public void createTrainingFullText(String str, String str2, String str3, int i) {
        this.parsers.getFullTextParser().createTrainingFullText(str, str2, str3, i);
    }

    public void createTrainingSegmentation(String str, String str2, String str3, int i) {
        this.parsers.getSegmentationParser().createTrainingSegmentation(str, str2, str3, i);
    }

    public String fullTextToTEI(String str, boolean z, boolean z2) throws Exception {
        return fullTextToTEI(str, z, z2, null, -1, -1, false);
    }

    public String fullTextToTEI(String str, boolean z, boolean z2, String str2) throws Exception {
        return fullTextToTEI(str, z, z2, str2, -1, -1, false);
    }

    public String fullTextToTEI(String str, boolean z, boolean z2, String str2, int i, int i2, boolean z3) throws Exception {
        FullTextParser fullTextParser = this.parsers.getFullTextParser();
        LOGGER.debug("Starting processing fullTextToTEI on " + str);
        long currentTimeMillis = System.currentTimeMillis();
        Document processing = fullTextParser.processing(str, z, z2, 0, str2, i, i2, z3);
        LOGGER.debug("Ending processing fullTextToTEI on " + str + ". Time to process: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        return processing.getTei();
    }

    public int batchCreateTrainingHeader(String str, String str2, int i) {
        return batchCreateTraining(str, str2, i, 0);
    }

    public int batchCreateTrainingFulltext(String str, String str2, int i) {
        return batchCreateTraining(str, str2, i, 1);
    }

    public int batchCreateTrainingSegmentation(String str, String str2, int i) {
        return batchCreateTraining(str, str2, i, 2);
    }

    private int batchCreateTraining(String str, String str2, int i, int i2) {
        try {
            File[] listFiles = new File(str).listFiles(new FilenameFilter() { // from class: org.grobid.core.engines.Engine.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file, String str3) {
                    System.out.println(str3);
                    return str3.endsWith(".pdf") || str3.endsWith(".PDF");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            System.out.println(listFiles.length + " files to be processed.");
            int i3 = i == -1 ? 1 : 0;
            for (File file : listFiles) {
                if (i2 == 0) {
                    try {
                        createTrainingHeader(file.getPath(), str2, str2, i + i3);
                    } catch (Exception e) {
                        LOGGER.error("An error occured while processing the following pdf: " + file.getPath() + ": " + e);
                    }
                } else if (i2 == 1) {
                    createTrainingFullText(file.getPath(), str2, str2, i + i3);
                } else if (i2 == 2) {
                    createTrainingSegmentation(file.getPath(), str2, str2, i + i3);
                } else if (i2 == 3) {
                    createTrainingReferenceSegmentation(file.getPath(), str2, i + i3);
                }
                if (i != -1) {
                    i3++;
                }
            }
            return listFiles.length;
        } catch (Exception e2) {
            throw new GrobidException("An exception occured while running Grobid batch.", e2);
        }
    }

    public int batchProcessHeader(String str, String str2, boolean z) throws Exception {
        return batchProcess(str, str2, z, z, 0);
    }

    public int batchProcessFulltext(String str, String str2, boolean z, boolean z2) {
        return batchProcess(str, str2, z, z2, 1);
    }

    private int batchProcess(String str, String str2, boolean z, boolean z2, int i) {
        String fullTextToTEI;
        if (str == null) {
            throw new GrobidResourceException("Cannot start parsing, because the input path, where the pdf files are supposed to be located is null.");
        }
        if (str2 == null) {
            throw new GrobidResourceException("Cannot start parsing, because the output path, where the tei files will be written to is null.");
        }
        File file = new File(str);
        if (!file.exists()) {
            throw new GrobidResourceException("Cannot start parsing, because the input path, where the pdf files are supposed to be located '" + file.getAbsolutePath() + "' does not exists.");
        }
        File file2 = new File(str2);
        if (!file2.exists() && !file2.mkdirs()) {
            throw new GrobidResourceException("Cannot start parsing, because cannot create output path for tei files on location '" + file2.getAbsolutePath() + "'.");
        }
        try {
            File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.grobid.core.engines.Engine.2
                @Override // java.io.FilenameFilter
                public boolean accept(File file3, String str3) {
                    return str3.endsWith(".pdf") || str3.endsWith(".PDF");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            for (File file3 : listFiles) {
                if (!file3.exists()) {
                    throw new GrobidResourceException("A problem occurs in reading pdf file '" + file3.getAbsolutePath() + "'. The file does not exists. ");
                }
                if (i == 0) {
                    String processHeader = processHeader(file3.getPath(), z, new BiblioItem());
                    if (processHeader != null) {
                        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + file3.getName().replace(".pdf", GrobidProperties.FILE_ENDING_TEI_HEADER)), false), "UTF-8");
                        outputStreamWriter.write(processHeader + "\n");
                        outputStreamWriter.close();
                    }
                } else if (i == 1 && (fullTextToTEI = fullTextToTEI(file3.getPath(), z, z2)) != null) {
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + file3.getName().replace(".pdf", GrobidProperties.FILE_ENDING_TEI_FULLTEXT)), false), "UTF-8");
                    outputStreamWriter2.write(fullTextToTEI + "\n");
                    outputStreamWriter2.close();
                }
            }
            return listFiles.length;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public static String header2TEI(BiblioItem biblioItem) {
        return biblioItem.toTEI(0);
    }

    public static String header2BibTeX(BiblioItem biblioItem) {
        return biblioItem.toBibTeX();
    }

    public static String references2TEI2(String str, List<BibDataSet> list) {
        StringBuilder sb = new StringBuilder();
        sb.append("<tei>\n");
        BiblioSet biblioSet = new BiblioSet();
        Iterator<BibDataSet> it = list.iterator();
        while (it.hasNext()) {
            it.next().getResBib().buildBiblioSet(biblioSet, str);
        }
        sb.append(biblioSet.toTEI());
        sb.append("<listbibl>\n");
        Iterator<BibDataSet> it2 = list.iterator();
        while (it2.hasNext()) {
            sb.append("\n").append(it2.next().getResBib().toTEI2(biblioSet));
        }
        sb.append("\n</listbibl>\n</tei>\n");
        return sb.toString();
    }

    public static String references2TEI(String str, List<BibDataSet> list) {
        StringBuilder sb = new StringBuilder();
        sb.append("<listbibl>\n");
        int i = 0;
        Iterator<BibDataSet> it = list.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            resBib.setPath(str);
            sb.append("\n").append(resBib.toTEI(i));
            i++;
        }
        sb.append("\n</listbibl>\n");
        return sb.toString();
    }

    public String references2BibTeX(String str, List<BibDataSet> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<BibDataSet> it = list.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            resBib.setPath(str);
            sb.append("\n").append(resBib.toBibTeX());
        }
        return sb.toString();
    }

    public static String reference2TEI(String str, List<BibDataSet> list, int i) {
        StringBuilder sb = new StringBuilder();
        if (list != null && i <= list.size()) {
            BiblioItem resBib = list.get(i).getResBib();
            resBib.setPath(str);
            sb.append(resBib.toTEI(i));
        }
        return sb.toString();
    }

    public static String reference2BibTeX(String str, List<BibDataSet> list, int i) {
        StringBuilder sb = new StringBuilder();
        if (list != null && i <= list.size()) {
            BiblioItem resBib = list.get(i).getResBib();
            resBib.setPath(str);
            sb.append(resBib.toBibTeX());
        }
        return sb.toString();
    }

    public String processAllCitationsInPatent(String str, List<BibDataSet> list, List<PatentItem> list2, boolean z) throws Exception {
        if (list == null && list2 == null) {
            return null;
        }
        return this.parsers.getReferenceExtractor().extractAllReferencesString(str, false, z, list2, list);
    }

    public String processAllCitationsInXMLPatent(String str, List<BibDataSet> list, List<PatentItem> list2, boolean z) throws Exception {
        if (list == null && list2 == null) {
            return null;
        }
        return this.parsers.getReferenceExtractor().extractAllReferencesXMLFile(str, false, z, list2, list);
    }

    public String processAllCitationsInPDFPatent(String str, List<BibDataSet> list, List<PatentItem> list2, boolean z) throws Exception {
        if (list == null && list2 == null) {
            return null;
        }
        return this.parsers.getReferenceExtractor().extractAllReferencesPDFFile(str, false, z, list2, list);
    }

    public void processCitationPatentTEI(String str, String str2, boolean z) throws Exception {
        try {
            FileInputStream fileInputStream = new FileInputStream(new File(str));
            FileOutputStream fileOutputStream = new FileOutputStream(new File(str2));
            new TeiStAXParser(fileInputStream, fileOutputStream, false, z).parse();
            fileInputStream.close();
            fileOutputStream.close();
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public void createTrainingPatentCitations(String str, String str2) throws Exception {
        this.parsers.getReferenceExtractor().generateTrainingData(str, str2);
    }

    public int batchCreateTrainingPatentcitations(String str, String str2) throws Exception {
        try {
            File[] listFiles = new File(str).listFiles(new FilenameFilter() { // from class: org.grobid.core.engines.Engine.3
                @Override // java.io.FilenameFilter
                public boolean accept(File file, String str3) {
                    return str3.endsWith(".xml") || str3.endsWith(".XML") || str3.endsWith(".xml.gz") || str3.endsWith(".XML.gz");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            for (File file : listFiles) {
                createTrainingPatentCitations(file.getPath(), str2);
            }
            return listFiles.length;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public List<ChemicalEntity> extractChemicalEntities(String str) throws Exception {
        return this.parsers.getChemicalParser().extractChemicalEntities(str);
    }

    public String getAbstract(Document document) throws Exception {
        return document.getResHeader().getAbstract().replace("@BULLET", " • ");
    }

    public String printRefTitles(List<BibDataSet> list) throws Exception {
        StringBuilder sb = new StringBuilder();
        Iterator<BibDataSet> it = list.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            if (resBib.getTitle() != null) {
                sb.append(resBib.getTitle()).append("\n");
            }
        }
        return sb.toString();
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public synchronized void close() throws IOException {
        this.parsers.close();
    }

    public static void setCntManager(CntManager cntManager2) {
        cntManager = cntManager2;
    }

    public static CntManager getCntManager() {
        return cntManager;
    }

    public EngineParsers getParsers() {
        return this.parsers;
    }
}
