package pt.up.hs.linguini.analysis;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import pt.up.hs.linguini.exceptions.AnalyzerException;
import pt.up.hs.linguini.filters.PunctuationTokenFilter;
import pt.up.hs.linguini.filters.StopTokenFilter;
import pt.up.hs.linguini.jspell.JSpellWordAnnotator;
import pt.up.hs.linguini.models.Token;
import pt.up.hs.linguini.transformers.LemmaTokenTransformer;
import pt.up.hs.linguini.utils.MathUtils;

/* loaded from: input_file:pt/up/hs/linguini/analysis/LexicalDiversityAnalysis.class */
public class LexicalDiversityAnalysis implements Analysis<Void, Double> {
    private static final int MINIMUM_TOKENS = 50;
    private static final double DEFAULT_MTLD_THRESHOLD = 0.72d;
    private static final int DEFAULT_HDD_SAMPLE_SIZE = 42;
    private Locale locale;
    private Algorithm algorithm;
    private boolean lemmatize;
    private double mtldThreshold;
    private int hddSampleSize;
    private List<Token> tokens;
    private Double result;

    /* loaded from: input_file:pt/up/hs/linguini/analysis/LexicalDiversityAnalysis$Algorithm.class */
    public enum Algorithm {
        MTLD,
        HDD
    }

    public LexicalDiversityAnalysis(Locale locale, Algorithm algorithm, boolean z) {
        this.mtldThreshold = DEFAULT_MTLD_THRESHOLD;
        this.hddSampleSize = DEFAULT_HDD_SAMPLE_SIZE;
        this.tokens = null;
        this.result = null;
        this.locale = locale;
        this.algorithm = algorithm;
        this.lemmatize = z;
    }

    public LexicalDiversityAnalysis(Locale locale, Algorithm algorithm, boolean z, double d) {
        this.mtldThreshold = DEFAULT_MTLD_THRESHOLD;
        this.hddSampleSize = DEFAULT_HDD_SAMPLE_SIZE;
        this.tokens = null;
        this.result = null;
        this.locale = locale;
        this.algorithm = algorithm;
        this.lemmatize = z;
        this.mtldThreshold = d;
    }

    public LexicalDiversityAnalysis(Locale locale, Algorithm algorithm, boolean z, int i) {
        this.mtldThreshold = DEFAULT_MTLD_THRESHOLD;
        this.hddSampleSize = DEFAULT_HDD_SAMPLE_SIZE;
        this.tokens = null;
        this.result = null;
        this.locale = locale;
        this.algorithm = algorithm;
        this.lemmatize = z;
        this.hddSampleSize = i;
    }

    @Override // pt.up.hs.linguini.analysis.Analysis
    public Analysis<Void, Double> preprocess(List<Token> list) throws AnalyzerException {
        PunctuationTokenFilter punctuationTokenFilter = new PunctuationTokenFilter();
        StopTokenFilter stopTokenFilter = new StopTokenFilter(this.locale);
        Stream<Token> parallelStream = list.parallelStream();
        punctuationTokenFilter.getClass();
        Stream<Token> filter = parallelStream.filter(punctuationTokenFilter::accept);
        stopTokenFilter.getClass();
        Stream<Token> filter2 = filter.filter(stopTokenFilter::accept);
        if (this.lemmatize) {
            try {
                LemmaTokenTransformer lemmaTokenTransformer = new LemmaTokenTransformer(new JSpellWordAnnotator(this.locale));
                lemmaTokenTransformer.getClass();
                filter2 = filter2.map(lemmaTokenTransformer::transform);
            } catch (IOException | URISyntaxException e) {
                throw new AnalyzerException("Could not lemmatize words", e);
            }
        }
        this.tokens = (List) filter2.collect(Collectors.toList());
        return this;
    }

    /* renamed from: skipPreprocessing, reason: avoid collision after fix types in other method */
    public Analysis<Void, Double> skipPreprocessing2(List<Token> list, Void r5) {
        this.tokens = list;
        return this;
    }

    @Override // pt.up.hs.linguini.analysis.Analysis
    /* renamed from: execute, reason: merged with bridge method [inline-methods] */
    public Analysis<Void, Double> execute2() throws AnalyzerException {
        if (this.tokens.size() < MINIMUM_TOKENS) {
            throw new AnalyzerException("Cannot calculate lexical diversity in texts with less than 50 words.");
        }
        switch (this.algorithm) {
            case HDD:
                this.result = Double.valueOf(hdd(this.tokens, this.hddSampleSize));
                break;
            case MTLD:
                this.result = Double.valueOf(mtld(this.tokens, this.mtldThreshold));
                break;
            default:
                throw new AnalyzerException("Unknown algorithm to calculate lexical diversity '" + this.algorithm + "'.");
        }
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pt.up.hs.linguini.analysis.Analysis
    public Double getResult() {
        return this.result;
    }

    private double hdd(List<Token> list, int i) throws AnalyzerException {
        HashMap hashMap = new HashMap();
        for (Token token : list) {
            if (hashMap.containsKey(token.getWord())) {
                hashMap.put(token.getWord(), Integer.valueOf(((Integer) hashMap.get(token.getWord())).intValue() + 1));
            } else {
                hashMap.put(token.getWord(), 1);
            }
        }
        double d = 0.0d;
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            d += (1.0d - MathUtils.hypergeometric(list.size(), i, ((Integer) hashMap.get((String) it.next())).intValue(), 0)) / i;
        }
        return d;
    }

    private double mtld(List<Token> list, double d) {
        double d2 = 1.0d;
        HashSet hashSet = new HashSet();
        double d3 = 0.0d;
        Iterator<Token> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getWord());
            d2 = hashSet.size() / list.size();
            if (d2 <= d) {
                d3 += 1.0d;
                d2 = 1.0d;
                hashSet.clear();
            }
        }
        double d4 = d3 + ((1.0d - d2) / (1.0d - d));
        if (d4 > 0.0d) {
            return list.size() / d4;
        }
        return -1.0d;
    }

    @Override // pt.up.hs.linguini.analysis.Analysis
    public /* bridge */ /* synthetic */ Analysis<Void, Double> skipPreprocessing(List list, Void r6) throws AnalyzerException {
        return skipPreprocessing2((List<Token>) list, r6);
    }
}
