package org.grobid.core.engines.citations;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/grobid-core-0.3.4.jar:org/grobid/core/engines/citations/AdditionalRegexTextSegmenter.class */
public class AdditionalRegexTextSegmenter {
    private static final int MAX_CITATION_COUNT = 512;
    private static final int MAXIMUM_SEGMENT_LENGTH = 700;
    private static final int MINIMUM_SEGMENT_LENGTH = 15;
    public static final Logger LOGGER = LoggerFactory.getLogger(AdditionalRegexTextSegmenter.class.getName());
    private static final Pattern BRACKET_NUMBER_LOOKUP_PATTERN = Pattern.compile("(?s).{0,15}\\[\\d\\] .{10,701}\\n\\[\\d+\\] .*");
    private static final Pattern BULLET_NUMBER_LOOKUP_PATTERN = Pattern.compile("(?s).{0,10}1\\. .{10,701}\\n[\\s0]*2\\. .*");
    private static final Pattern BRACKET_SPLIT_PATTERN = Pattern.compile("\\[(\\d+)\\] ");
    private static final Pattern BULLET_SPLIT_PATTERN = Pattern.compile("\\n(\\d+)\\. ");
    private static final Pattern GENERIC_SPLIT_PATTERN = Pattern.compile("\\.[\\s]*\\n");
    private static final Pattern BROKEN_RACKETS_PATTERN = Pattern.compile("(\\[\\d+\\]\\s*\\n){5,}");
    private static List<Character> sparseLetters = Arrays.asList('O', 'P', 'T', 'U', 'V', 'W', 'X', 'Y');

    public List<String> extractCitationSegments(String str) {
        if (str == null || str.isEmpty()) {
            return Collections.emptyList();
        }
        if (BROKEN_RACKETS_PATTERN.matcher(str).find()) {
            return cleanCitations(splitGenerically(str));
        }
        try {
            return cleanCitations(BRACKET_NUMBER_LOOKUP_PATTERN.matcher(str).find() ? splitAlongBracketedNumbers(str) : BULLET_NUMBER_LOOKUP_PATTERN.matcher(str).find() ? splitAlongBulletNumbers(str) : splitGenerically(str));
        } catch (StackOverflowError e) {
            LOGGER.error("Stackoverflow");
            throw new RuntimeException("Runtime exception with stackoverflow in AdditionalRegexTextSegmenter");
        }
    }

    private List<String> cleanCitations(List<String> list) {
        if (list.size() > 512) {
            list = list.subList(0, 512);
        }
        if (list.size() <= 1) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(list.size());
        for (String str : list) {
            if (str.length() < MAXIMUM_SEGMENT_LENGTH && str.trim().length() != 0) {
                arrayList.add(str);
            }
        }
        return arrayList;
    }

    private List<String> splitAlongBracketedNumbers(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = BRACKET_SPLIT_PATTERN.matcher(str);
        if (!matcher.find()) {
            return Collections.emptyList();
        }
        Integer valueOf = Integer.valueOf(matcher.end());
        Integer valueOf2 = Integer.valueOf(matcher.group(1));
        while (matcher.find()) {
            if (Integer.valueOf(matcher.group(1)).intValue() == valueOf2.intValue() + 1) {
                Integer valueOf3 = Integer.valueOf(matcher.start() - 1);
                if (valueOf3.intValue() - valueOf.intValue() >= 0) {
                    arrayList.add(str.substring(valueOf.intValue(), valueOf3.intValue()));
                    valueOf = Integer.valueOf(matcher.end());
                    valueOf2 = Integer.valueOf(valueOf2.intValue() + 1);
                }
            }
        }
        arrayList.add(str.substring(valueOf.intValue()));
        return arrayList;
    }

    private List<String> splitAlongBulletNumbers(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = BULLET_SPLIT_PATTERN.matcher(str);
        if (!matcher.find()) {
            return Collections.emptyList();
        }
        Integer valueOf = Integer.valueOf(matcher.end());
        Integer valueOf2 = Integer.valueOf(matcher.group(1));
        if (valueOf2.intValue() == 2) {
            arrayList.add(str.substring(2, matcher.start()));
        }
        while (matcher.find()) {
            if (Integer.valueOf(matcher.group(1)).intValue() == valueOf2.intValue() + 1) {
                Integer valueOf3 = Integer.valueOf(matcher.start() - 1);
                if (valueOf3.intValue() - valueOf.intValue() >= 0) {
                    arrayList.add(str.substring(valueOf.intValue(), valueOf3.intValue()));
                    valueOf = Integer.valueOf(matcher.end());
                    valueOf2 = Integer.valueOf(valueOf2.intValue() + 1);
                }
            }
        }
        arrayList.add(str.substring(valueOf.intValue()));
        return arrayList;
    }

    private List<String> splitGenerically(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = GENERIC_SPLIT_PATTERN.matcher(str);
        boolean z = true;
        int i = 0;
        int i2 = 0;
        while (matcher.find()) {
            i++;
            if (matcher.end() >= str.length()) {
                break;
            }
            if (str.charAt(matcher.end()) < 'A') {
                i2++;
            }
        }
        if (i == 0) {
            LOGGER.info("Single segment found!");
            return Arrays.asList(str);
        }
        if (i2 > 0.25d * i) {
            LOGGER.info("Citations not ordered.");
            z = false;
        }
        int i3 = ((26 / i) + 1) * 2;
        matcher.reset();
        char charAt = str.charAt(0);
        Integer num = 0;
        while (matcher.find() && matcher.end() < str.length()) {
            char charAt2 = str.charAt(matcher.end());
            Integer valueOf = Integer.valueOf(matcher.start());
            if (valueOf.intValue() - num.intValue() > 15 && (!z || isValidNextFirstLetter(charAt, charAt2, i3))) {
                arrayList.add(str.substring(num.intValue(), valueOf.intValue()));
                charAt = charAt2;
                num = Integer.valueOf(valueOf.intValue() + 2);
            }
        }
        arrayList.add(str.substring(num.intValue()));
        return arrayList;
    }

    private boolean isValidNextFirstLetter(char c, char c2, int i) {
        if (c2 < c) {
            return false;
        }
        if (sparseLetters.contains(Character.valueOf(c))) {
            i += 2;
        }
        return c2 - c <= i;
    }

    public static void main(String[] strArr) throws IOException {
        String readFileToString = FileUtils.readFileToString(new File("/tmp/text.txt"));
        System.out.println(readFileToString.length());
        Pattern.compile("(?s).{0,10}1\\. .{10,100}\\n[\\s0]*2\\. .*").matcher(readFileToString).find();
    }
}
