package org.grobid.core.document;

import com.google.common.collect.Iterables;
import com.google.common.collect.TreeMultimap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.xml.serialize.LineSeparator;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.engines.tagging.GenericTaggerUtils;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.Cluster;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.Pair;
import org.grobid.core.utilities.TextUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/grobid-core-0.3.4.jar:org/grobid/core/document/BasicStructureBuilder.class */
public class BasicStructureBuilder {
    private static final Logger LOGGER = LoggerFactory.getLogger(BasicStructureBuilder.class);
    public static Pattern introduction = Pattern.compile("^\\b*(Introduction?|Einleitung|INTRODUCTION|Acknowledge?ments?|Acknowledge?ment?|Background?|Content?|Contents?|Motivations?|1\\.\\sPROBLEMS?|1\\.(\\n)?\\sIntroduction?|1\\.\\sINTRODUCTION|I\\.(\\s)+Introduction|1\\.\\sProblems?|I\\.\\sEinleitung?|1\\.\\sEinleitung?|1\\sEinleitung?|1\\sIntroduction?)", 2);
    public static Pattern introductionStrict = Pattern.compile("^\\b*(1\\.\\sPROBLEMS?|1\\.(\\n)?\\sIntroduction?|1\\.(\\n)?\\sContent?|1\\.\\sINTRODUCTION|I\\.(\\s)+Introduction|1\\.\\sProblems?|I\\.\\sEinleitung?|1\\.\\sEinleitung?|1\\sEinleitung?|1\\sIntroduction?)", 2);
    public static Pattern abstract_ = Pattern.compile("^\\b*\\.?(abstract?|résumé?|summary?|zusammenfassung?)", 2);
    public static Pattern keywords = Pattern.compile("^\\b*\\.?(keyword?|key\\s*word?|mots\\s*clefs?)", 2);
    public static Pattern references = Pattern.compile("^\\b*(References?|REFERENCES?|Bibliography|BIBLIOGRAPHY|References?\\s+and\\s+Notes?|References?\\s+Cited|REFERENCE?\\s+CITED|REFERENCES?\\s+AND\\s+NOTES?|Références|Literatur|LITERATURA|Literatur|Referências|BIBLIOGRAFIA|Literaturverzeichnis|Referencias|LITERATURE CITED|References and Notes)", 2);
    public static Pattern header = Pattern.compile("^((\\d\\d?)|([A-Z](I|V|X)*))(\\.(\\d)*)*\\s(\\D+)");
    public static Pattern figure = Pattern.compile("(figure\\s|fig\\.|sch?ma)", 2);
    public static Pattern table = Pattern.compile("^(T|t)able\\s|tab|tableau", 2);
    public static Pattern equation = Pattern.compile("^(E|e)quation\\s");
    private static Pattern acknowledgement = Pattern.compile("(acknowledge?ments?|acknowledge?ment?)", 2);
    public static Pattern headerNumbering1 = Pattern.compile("^(\\d+)\\.?\\s");
    public static Pattern headerNumbering2 = Pattern.compile("^((\\d+)\\.)+(\\d+)\\s");
    public static Pattern headerNumbering3 = Pattern.compile("^((\\d+)\\.)+\\s");
    public static Pattern headerNumbering4 = Pattern.compile("^([A-Z](I|V|X)*(\\.(\\d)*)*\\s)");
    private static Pattern startNum = Pattern.compile("^(\\d)+\\s");
    private static Pattern endNum = Pattern.compile("\\s(\\d)+$");

    public boolean filterLineNumber(Document document) {
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        int i = -1;
        int i2 = -1;
        int i3 = 0;
        Iterator<Block> it = document.getBlocks().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Block next = it.next();
            String text = next.getText();
            List<LayoutToken> list = next.tokens;
            if (text != null && list != null && list.size() > 0) {
                String trim = text.trim();
                Matcher matcher = startNum.matcher(trim);
                Matcher matcher2 = endNum.matcher(trim);
                if (matcher.find()) {
                    try {
                        i = Integer.parseInt(matcher.group(0));
                        z = true;
                    } catch (NumberFormatException e) {
                        i = -1;
                    }
                } else if (matcher2.find()) {
                    try {
                        i = Integer.parseInt(matcher2.group(0));
                        z2 = true;
                    } catch (NumberFormatException e2) {
                        i = -1;
                    }
                }
                if (i2 != -1) {
                    if (i == i2 + 1) {
                        z3 = true;
                        break;
                    }
                } else {
                    i2 = i;
                }
            }
            i3++;
            if (i3 > 5) {
                break;
            }
        }
        int i4 = 0;
        if (z3) {
            int i5 = 1;
            for (Block block : document.getBlocks()) {
                String text2 = block.getText();
                List<LayoutToken> list2 = block.tokens;
                if (text2 != null && list2.size() > 0) {
                    if (z2) {
                        Matcher matcher3 = endNum.matcher(text2);
                        if (matcher3.find()) {
                            String group = matcher3.group(0);
                            if (group.trim().equals("" + i5)) {
                                block.setText(text2.substring(0, text2.length() - group.length()));
                                list2.remove(list2.size() - 1);
                                i5++;
                            }
                        }
                    } else if (z) {
                        Matcher matcher4 = endNum.matcher(text2);
                        if (matcher4.find()) {
                            String group2 = matcher4.group(0);
                            if (group2.trim().equals("" + i5)) {
                                block.setText(text2.substring(group2.length(), text2.length() - 1));
                                list2.remove(0);
                                i5++;
                            }
                        }
                    }
                }
                i4++;
            }
        }
        return z3;
    }

    public static void firstPass(Document document) {
        if (document == null) {
            throw new NullPointerException();
        }
        if (document.getBlocks() == null) {
            throw new NullPointerException();
        }
        int i = 0;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        ArrayList arrayList7 = new ArrayList();
        ArrayList arrayList8 = new ArrayList();
        ArrayList arrayList9 = new ArrayList();
        document.setTitleMatchNum(false);
        try {
            Iterator<Block> it = document.getBlocks().iterator();
            while (it.hasNext()) {
                String trim = it.next().getText().trim().replace("\n", " ").replace("  ", " ").trim();
                Matcher matcher = introduction.matcher(trim);
                Matcher matcher2 = references.matcher(trim);
                if (matcher.find() || matcher2.find()) {
                    if (trim.startsWith("1.") || trim.startsWith("1 ") || trim.startsWith("2.") || trim.startsWith("2 ") || trim.startsWith("Contents")) {
                        document.setTitleMatchNum(true);
                    }
                    arrayList3.add(Integer.valueOf(i));
                } else {
                    StringTokenizer stringTokenizer = new StringTokenizer(trim, "\n");
                    while (stringTokenizer.hasMoreTokens()) {
                        if (stringTokenizer.nextToken().startsWith("@PAGE")) {
                            if (i > 4 && document.getBlocks().get(i - 5).getNbTokens() < 20) {
                                Integer valueOf = Integer.valueOf(i - 5);
                                if (!arrayList2.contains(valueOf)) {
                                    arrayList2.add(valueOf);
                                }
                            }
                            if (i > 3 && document.getBlocks().get(i - 4).getNbTokens() < 20) {
                                Integer valueOf2 = Integer.valueOf(i - 4);
                                if (!arrayList2.contains(valueOf2)) {
                                    arrayList2.add(valueOf2);
                                }
                            }
                            if (i > 2 && document.getBlocks().get(i - 3).getNbTokens() < 20) {
                                Integer valueOf3 = Integer.valueOf(i - 3);
                                if (!arrayList2.contains(valueOf3)) {
                                    arrayList2.add(valueOf3);
                                }
                            }
                            if (i > 1 && document.getBlocks().get(i - 2).getNbTokens() < 20) {
                                Integer valueOf4 = Integer.valueOf(i - 2);
                                if (!arrayList2.contains(valueOf4)) {
                                    arrayList2.add(valueOf4);
                                }
                            }
                            if (i > 0 && document.getBlocks().get(i - 1).getNbTokens() < 20) {
                                Integer valueOf5 = Integer.valueOf(i - 1);
                                if (!arrayList2.contains(valueOf5)) {
                                    arrayList2.add(valueOf5);
                                }
                            }
                            arrayList2.add(Integer.valueOf(i));
                            arrayList.add(Integer.valueOf(i));
                            if (i < document.getBlocks().size() - 1 && document.getBlocks().get(i + 1).getNbTokens() < 20 && !arrayList.contains(Integer.valueOf(i + 1))) {
                                arrayList.add(Integer.valueOf(i + 1));
                            }
                            if (i < document.getBlocks().size() - 2 && document.getBlocks().get(i + 2).getNbTokens() < 20 && !arrayList.contains(Integer.valueOf(i + 2))) {
                                arrayList.add(Integer.valueOf(i + 2));
                            }
                            if (i < document.getBlocks().size() - 3 && document.getBlocks().get(i + 3).getNbTokens() < 20 && !arrayList.contains(Integer.valueOf(i + 3))) {
                                arrayList.add(Integer.valueOf(i + 3));
                            }
                            if (i < document.getBlocks().size() - 4 && document.getBlocks().get(i + 4).getNbTokens() < 20 && !arrayList.contains(Integer.valueOf(i + 4))) {
                                arrayList.add(Integer.valueOf(i + 4));
                            }
                        }
                    }
                }
                addBlockToCluster(Integer.valueOf(i), document);
                i++;
            }
            Cluster cluster = null;
            for (Cluster cluster2 : document.getClusters()) {
                if (cluster2.getNbBlocks() < document.getBlocks().size() / 5 && cluster2.getNbBlocks() < 20) {
                    Iterator<Integer> it2 = cluster2.getBlocks2().iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        if (arrayList3.contains(it2.next()) && cluster == null) {
                            cluster = cluster2;
                            break;
                        }
                    }
                }
            }
            if (cluster != null) {
                ArrayList arrayList10 = new ArrayList();
                for (Integer num : arrayList3) {
                    if (!arrayList10.contains(num)) {
                        arrayList10.add(num);
                    }
                }
                List<Integer> blocks2 = cluster.getBlocks2();
                if (blocks2.size() < 20) {
                    for (Integer num2 : blocks2) {
                        if (!arrayList10.contains(num2)) {
                            arrayList10.add(num2);
                        }
                    }
                }
                arrayList3 = arrayList10;
            }
            boolean z = false;
            int i2 = 0;
            Iterator<Block> it3 = document.getBlocks().iterator();
            while (it3.hasNext()) {
                String trim2 = it3.next().getText().trim().replace("\n", " ").replace("  ", " ").trim();
                Integer valueOf6 = Integer.valueOf(i2);
                if (acknowledgement.matcher(trim2).find() && arrayList3.contains(valueOf6)) {
                    arrayList4.add(valueOf6);
                    z = true;
                } else {
                    if (z && arrayList3.contains(valueOf6)) {
                        break;
                    }
                    if (z) {
                        Matcher matcher3 = references.matcher(trim2);
                        if (!z || arrayList2.contains(valueOf6) || matcher3.find()) {
                            if (matcher3.find()) {
                                break;
                            }
                        } else {
                            arrayList4.add(valueOf6);
                        }
                    } else {
                        continue;
                    }
                }
                i2++;
            }
            int i3 = -1;
            Iterator<Integer> it4 = arrayList3.iterator();
            while (true) {
                if (!it4.hasNext()) {
                    break;
                }
                Integer next = it4.next();
                if (references.matcher(document.getBlocks().get(next.intValue()).getText().trim().replace("\n", " ").replace("  ", " ").trim()).find()) {
                    i3 = arrayList3.indexOf(next);
                    break;
                }
            }
            if (i3 != -1) {
                arrayList3.remove(i3);
            }
            ArrayList arrayList11 = new ArrayList();
            for (Integer num3 : arrayList) {
                int length = TextUtilities.shadowNumbers(document.getBlocks().get(num3.intValue()).getText().trim()).length();
                if (length > 160) {
                    arrayList11.add(num3);
                } else {
                    boolean z2 = false;
                    Iterator<Integer> it5 = arrayList.iterator();
                    while (true) {
                        if (!it5.hasNext()) {
                            break;
                        }
                        Integer next2 = it5.next();
                        if (num3.intValue() != next2.intValue()) {
                            if (document.getBlocks().get(next2.intValue()).getText().trim().length() < 160 && TextUtilities.getLevenshteinDistance(r0, TextUtilities.shadowNumbers(r0)) / length < 0.25d) {
                                z2 = true;
                                break;
                            }
                        }
                    }
                    if (!z2) {
                        arrayList11.add(num3);
                    }
                }
            }
            Iterator it6 = arrayList11.iterator();
            while (it6.hasNext()) {
                arrayList.remove((Integer) it6.next());
            }
            ArrayList arrayList12 = new ArrayList();
            for (Integer num4 : arrayList2) {
                int length2 = TextUtilities.shadowNumbers(document.getBlocks().get(num4.intValue()).getText().trim()).length();
                if (length2 > 160) {
                    arrayList12.add(num4);
                } else {
                    boolean z3 = false;
                    Iterator<Integer> it7 = arrayList2.iterator();
                    while (true) {
                        if (!it7.hasNext()) {
                            break;
                        }
                        Integer next3 = it7.next();
                        if (num4.intValue() != next3.intValue()) {
                            if (document.getBlocks().get(next3.intValue()).getText().trim().length() < 160 && TextUtilities.getLevenshteinDistance(r0, TextUtilities.shadowNumbers(r0)) / length2 < 0.25d) {
                                z3 = true;
                                break;
                            }
                        }
                    }
                    if (!z3) {
                        arrayList12.add(num4);
                    }
                }
            }
            Iterator it8 = arrayList12.iterator();
            while (it8.hasNext()) {
                arrayList2.remove((Integer) it8.next());
            }
            int i4 = 0;
            Iterator<Block> it9 = document.getBlocks().iterator();
            while (true) {
                if (!it9.hasNext()) {
                    break;
                }
                Block next4 = it9.next();
                String trim3 = next4.getText().trim().replace("\n", " ").replace("  ", " ").trim();
                if (trim3.startsWith("Author manuscript, published in") && Math.abs(Double.valueOf(next4.getY()).doubleValue() - 12.538d) < 2.0d) {
                    arrayList9.add(Integer.valueOf(i4));
                    break;
                }
                if (trim3.startsWith("Permission to make digital or hard copies")) {
                    arrayList2.add(Integer.valueOf(i4));
                    break;
                }
                if (trim3.startsWith("Confidential: ") && trim3.contains("IOP")) {
                    arrayList9.add(Integer.valueOf(i4));
                    break;
                }
                i4++;
            }
            int i5 = 0;
            for (Block block : document.getBlocks()) {
                String trim4 = block.getText().trim().replace("\n", " ").replace("  ", " ").trim();
                Matcher matcher4 = figure.matcher(trim4);
                Matcher matcher5 = table.matcher(trim4);
                double width = block.getWidth();
                boolean bold = block.getBold();
                if (matcher5.find() && (bold || trim4.length() < 200)) {
                    if (!arrayList7.contains(Integer.valueOf(i5))) {
                        arrayList7.add(Integer.valueOf(i5));
                    }
                    int i6 = i5 - 1;
                    while (i6 > i5 - 15 && i6 > 0) {
                        Block block2 = document.getBlocks().get(i6);
                        if (block2.getText() != null) {
                            if (block2.getText().length() >= 160 && width >= 50.0d) {
                                i6 = 0;
                            } else if (!arrayList5.contains(Integer.valueOf(i6)) && !arrayList3.contains(Integer.valueOf(i6)) && !arrayList.contains(Integer.valueOf(i6)) && !arrayList2.contains(Integer.valueOf(i6))) {
                                arrayList5.add(Integer.valueOf(i6));
                            }
                        }
                        i6--;
                    }
                    int i7 = i5 + 1;
                    while (i7 < i5 + 15 && i7 < document.getBlocks().size()) {
                        Block block3 = document.getBlocks().get(i7);
                        if (block3.getText() != null) {
                            if (block3.getText().length() >= 160 && width >= 50.0d) {
                                i7 = document.getBlocks().size();
                            } else if (!arrayList5.contains(Integer.valueOf(i7)) && !arrayList3.contains(Integer.valueOf(i7)) && !arrayList.contains(Integer.valueOf(i7)) && !arrayList2.contains(Integer.valueOf(i7))) {
                                arrayList5.add(Integer.valueOf(i7));
                            }
                        }
                        i7++;
                    }
                } else if (matcher4.find() && (bold || trim4.length() < 200)) {
                    if (!arrayList8.contains(Integer.valueOf(i5))) {
                        arrayList8.add(Integer.valueOf(i5));
                    }
                    int i8 = i5 - 1;
                    boolean z4 = false;
                    while (i8 > i5 - 15 && i8 > 0) {
                        Block block4 = document.getBlocks().get(i8);
                        if (block4.getText() != null) {
                            String trim5 = block4.getText().trim().replace("  ", " ").trim();
                            if (trim5.startsWith("@IMAGE") && !z4) {
                                block.setText(block.getText() + " " + trim5);
                                z4 = true;
                            }
                            if (trim5.length() >= 160 && width >= 50.0d) {
                                i8 = 0;
                            } else if (!arrayList6.contains(Integer.valueOf(i8)) && !arrayList3.contains(Integer.valueOf(i8)) && !arrayList.contains(Integer.valueOf(i8)) && !arrayList2.contains(Integer.valueOf(i8))) {
                                arrayList6.add(Integer.valueOf(i8));
                            }
                        }
                        i8--;
                    }
                    int i9 = i5 + 1;
                    while (i9 < i5 + 15 && i9 < document.getBlocks().size()) {
                        Block block5 = document.getBlocks().get(i9);
                        if (block5.getText() != null) {
                            if (block5.getText().trim().length() >= 160 && width >= 50.0d) {
                                i9 = document.getBlocks().size();
                            } else if (!arrayList6.contains(Integer.valueOf(i9)) && !arrayList3.contains(Integer.valueOf(i9)) && !arrayList.contains(Integer.valueOf(i9)) && !arrayList2.contains(Integer.valueOf(i9))) {
                                arrayList6.add(Integer.valueOf(i9));
                            }
                        }
                        i9++;
                    }
                }
                i5++;
            }
        } finally {
            document.setBlockHeaders(arrayList);
            document.setBlockFooters(arrayList2);
            document.setBlockSectionTitles(arrayList3);
            document.setAcknowledgementBlocks(arrayList4);
            document.setBlockTables(arrayList5);
            document.setBlockFigures(arrayList6);
            document.setBlockHeadTables(arrayList7);
            document.setBlockHeadFigures(arrayList8);
            document.setBlockDocumentHeaders(arrayList9);
        }
    }

    private static void addBlockToCluster(Integer num, Document document) {
        Block block = document.getBlocks().get(num.intValue());
        String font = block.getFont();
        boolean bold = block.getBold();
        boolean italic = block.getItalic();
        double fontSize = block.getFontSize();
        boolean z = false;
        if (font == null) {
            font = "unknown";
        }
        if (document.getClusters() == null) {
            document.setClusters(new ArrayList());
        } else {
            for (Cluster cluster : document.getClusters()) {
                String font2 = cluster.getFont();
                if (font2 == null) {
                    font2 = "unknown";
                }
                if (font.equals(font2)) {
                    if ((bold == cluster.getBold()) & (italic == cluster.getItalic()) & (fontSize == cluster.getFontSize())) {
                        cluster.addBlock2(num);
                        z = true;
                    }
                }
            }
        }
        if (z) {
            return;
        }
        Cluster cluster2 = new Cluster();
        cluster2.setFont(font);
        cluster2.setBold(bold);
        cluster2.setItalic(italic);
        cluster2.setFontSize(fontSize);
        cluster2.addBlock2(num);
        document.getClusters().add(cluster2);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Document generalResultSegmentation(Document document, String str, List<String> list) {
        List<Pair<String, String>> tokensAndLabels = GenericTaggerUtils.getTokensAndLabels(str);
        TreeMultimap create = TreeMultimap.create();
        document.setLabeledBlocks(create);
        List<Block> blocks = document.getBlocks();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        DocumentPointer documentPointer = DocumentPointer.START_DOCUMENT_POINTER;
        DocumentPointer documentPointer2 = null;
        String str2 = null;
        String str3 = null;
        int i6 = -1;
        int size = blocks.size() - 1;
        while (true) {
            if (size < 0) {
                break;
            }
            int endToken = blocks.get(size).getEndToken();
            if (endToken != -1) {
                i6 = endToken;
                break;
            }
            size--;
        }
        for (Pair pair : Iterables.concat(tokensAndLabels, Collections.singleton(new Pair("IgnoredToken", "@IGNORED_LABEL@")))) {
            if (pair == null) {
                i3++;
            } else {
                String str4 = null;
                while (str4 == null && i2 < blocks.size()) {
                    Block block = blocks.get(i2);
                    List<LayoutToken> tokens = block.getTokens();
                    String text = block.getText();
                    if (tokens == null || text == null || text.trim().length() == 0) {
                        i2++;
                        i = 0;
                        if (i2 < blocks.size()) {
                            i5 = blocks.get(i2).getStartToken();
                        }
                    } else {
                        String[] split = text.split("[\\n\\r]");
                        if (split.length == 0 || i >= split.length) {
                            i2++;
                            i = 0;
                            if (i2 < blocks.size()) {
                                i5 = blocks.get(i2).getStartToken();
                            }
                        } else {
                            str4 = split[i];
                            i++;
                            if (str4.trim().length() == 0 || TextUtilities.filterLine(str4)) {
                                str4 = null;
                            } else if (i5 <= i6) {
                                while (true) {
                                    if ((list.get(i5).equals(" ") || list.get(i5).equals("\t")) && i5 != i6) {
                                        i5++;
                                    }
                                }
                                if (!((String) pair.a).startsWith(list.get(i5))) {
                                    while (i5 < block.getEndToken()) {
                                        if (list.get(i5).equals("\n") || list.get(i5).equals(LineSeparator.Macintosh)) {
                                            do {
                                                i5++;
                                                if (!list.get(i5).equals(" ") && !list.get(i5).equals("\t")) {
                                                    break;
                                                }
                                            } while (i5 != i6);
                                            if (i5 != i6 && ((String) pair.a).startsWith(list.get(i5))) {
                                                break;
                                            }
                                        }
                                        i5++;
                                    }
                                }
                                i4 = i5;
                                while (i4 < block.getEndToken()) {
                                    if (list.get(i4).equals("\n") || list.get(i4).equals(LineSeparator.Macintosh)) {
                                        i4--;
                                        break;
                                    }
                                    i4++;
                                }
                            }
                        }
                    }
                }
                str2 = GenericTaggerUtils.getPlainLabel((String) pair.b);
                if (i2 == blocks.size()) {
                    break;
                }
                DocumentPointer documentPointer3 = new DocumentPointer(document, i2, i4);
                if (!str2.equals(str3) && str3 != null) {
                    if (documentPointer.getTokenDocPos() <= documentPointer2.getTokenDocPos() && documentPointer.getTokenDocPos() != -1) {
                        create.put(str3, new DocumentPiece(documentPointer, documentPointer2));
                    }
                    documentPointer = new DocumentPointer(document, i2, i5);
                }
                str3 = str2;
                documentPointer2 = documentPointer3;
                i5 = i4 + 2;
                i3++;
            }
        }
        if (i2 == blocks.size() && !str2.equals(str3) && str3 != null && documentPointer.getTokenDocPos() <= documentPointer2.getTokenDocPos() && documentPointer.getTokenDocPos() != -1) {
            create.put(str3, new DocumentPiece(documentPointer, documentPointer2));
        }
        return document;
    }

    public static Document resultSegmentation(Document document, String str, List<String> list) {
        if (document == null) {
            throw new NullPointerException("Document is null");
        }
        if (document.getBlocks() == null) {
            throw new NullPointerException("Blocks of the documents are null");
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        TreeSet treeSet = new TreeSet();
        document.setBibDataSets(new ArrayList());
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        int i = 0;
        int i2 = 0;
        BibDataSet bibDataSet = null;
        DocumentPointer documentPointer = null;
        DocumentPointer documentPointer2 = null;
        for (String str6 : str.split("\n")) {
            while (i2 < document.getBlocks().size() - 1 && document.getBlocks().get(i2).getEndToken() < i) {
                i2++;
            }
            ArrayList arrayList5 = new ArrayList();
            boolean z = false;
            StringTokenizer stringTokenizer = new StringTokenizer(str6.trim(), "\t");
            int i3 = 0;
            int countTokens = stringTokenizer.countTokens();
            while (stringTokenizer.hasMoreTokens()) {
                String trim = stringTokenizer.nextToken().trim();
                if (i3 == 0) {
                    str3 = trim;
                    boolean z2 = false;
                    while (!z2 && i < list.size()) {
                        String str7 = list.get(i);
                        if (((str7.equals(" ") | str7.equals("\n")) || str7.equals(LineSeparator.Macintosh)) || str7.equals("\t")) {
                            z = true;
                            i++;
                        } else if (str7.equals("")) {
                            i++;
                        } else {
                            z2 = true;
                        }
                    }
                } else if (i3 == countTokens - 1) {
                    str2 = trim;
                } else {
                    if (trim.equals("LINESTART")) {
                    }
                    arrayList5.add(trim);
                }
                i3++;
            }
            if (str4 != null) {
                str5 = str4.startsWith(GenericTaggerUtils.START_ENTITY_LABEL_PREFIX) ? str4.substring(2, str4.length()) : str4;
            }
            String substring = str2 != null ? str2.startsWith(GenericTaggerUtils.START_ENTITY_LABEL_PREFIX) ? str2.substring(2, str2.length()) : str2 : null;
            DocumentPointer documentPointer3 = new DocumentPointer(document, i2, i);
            if (str5 != null && !substring.equals(str5) && str5.equals("<references>")) {
                treeSet.add(new DocumentPiece(documentPointer, documentPointer2));
                documentPointer = documentPointer3;
            }
            if (substring.equals("<header>")) {
                if (!arrayList3.contains(Integer.valueOf(i2))) {
                    arrayList3.add(Integer.valueOf(i2));
                }
            } else if (substring.equals("<references>")) {
                if (str2.equals("I-<references>")) {
                    documentPointer = new DocumentPointer(document, i2, i);
                    if (bibDataSet == null) {
                        bibDataSet = new BibDataSet();
                    } else if (bibDataSet.getRawBib() != null) {
                        document.getBibDataSets().add(bibDataSet);
                        bibDataSet = new BibDataSet();
                    }
                    bibDataSet.setRawBib(str3);
                } else if (z) {
                    if (bibDataSet == null) {
                        bibDataSet = new BibDataSet();
                        bibDataSet.setRawBib(" " + str3);
                    } else {
                        bibDataSet.setRawBib(bibDataSet.getRawBib() + " " + str3);
                    }
                } else if (bibDataSet == null) {
                    bibDataSet = new BibDataSet();
                    bibDataSet.setRawBib(str3);
                } else {
                    bibDataSet.setRawBib(bibDataSet.getRawBib() + str3);
                }
            } else if (substring.equals("<page_footnote>")) {
                if (!arrayList2.contains(Integer.valueOf(i2))) {
                    arrayList2.add(Integer.valueOf(i2));
                }
            } else if (substring.equals("<page_header>")) {
                if (!arrayList.contains(Integer.valueOf(i2))) {
                    arrayList.add(Integer.valueOf(i2));
                }
            } else if (substring.equals("<section>") && !arrayList4.contains(Integer.valueOf(i2))) {
                arrayList4.add(Integer.valueOf(i2));
            }
            str4 = str2;
            i++;
            documentPointer2 = documentPointer3;
        }
        if (bibDataSet != null) {
            document.getBibDataSets().add(bibDataSet);
        }
        if (!documentPointer2.equals(documentPointer) && str5.equals("<references>")) {
            treeSet.add(new DocumentPiece(documentPointer, documentPointer2));
        }
        document.setBlockHeaders(arrayList);
        document.setBlockFooters(arrayList2);
        document.setBlockDocumentHeaders(arrayList3);
        document.setBlockReferences(treeSet);
        document.setBlockSectionTitles(arrayList4);
        return document;
    }
}
