/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language;

import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import com.optimaize.langdetect.text.UrlTextFilter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.Experimental;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.RuleErrorNotification;
import org.languagetool.RuleLoggerManager;
import org.languagetool.language.CommonWords;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LanguageIdentifier {
    private static final Logger logger = LoggerFactory.getLogger(LanguageIdentifier.class);
    private static final double MINIMAL_CONFIDENCE = 0.9;
    private static final int K_HIGHEST_SCORES = 5;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    private static final Pattern SIGNATURE = Pattern.compile("\n-- \n.*", 32);
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo");
    private static final float THRESHOLD = 0.9f;
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private final int maxLength;
    private boolean fasttextEnabled = false;
    private Process fasttextProcess;
    private BufferedReader fasttextIn;
    private BufferedWriter fasttextOut;

    public LanguageIdentifier() {
        this(1000);
    }

    public LanguageIdentifier(int maxLength) {
        if (maxLength < 10) {
            throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + maxLength);
        }
        this.maxLength = maxLength;
        try {
            List<LanguageProfile> profiles = this.loadProfiles(LanguageIdentifier.getLanguageCodes());
            this.languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).minimalConfidence(0.9).shortTextAlgorithm(50).withProfiles(profiles).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter((TextFilter)UrlTextFilter.getInstance()).withTextFilter((TextFilter)RemoveMinorityScriptsTextFilter.forThreshold((double)0.3)).withTextFilter((TextFilter)new RemoveEMailSignatureFilter()).build();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
    }

    public void enableFasttext(File fasttextBinary, File fasttextModel) {
        if (fasttextBinary != null && fasttextModel != null) {
            try {
                this.startFasttext(fasttextModel, fasttextBinary);
                logger.info("Started fasttext process for language identification: Binary " + fasttextBinary + " with model @ " + fasttextModel);
                this.fasttextEnabled = true;
            }
            catch (IOException e) {
                this.fasttextEnabled = false;
                logger.error("Error while starting fasttext (binary: " + fasttextBinary + ", model: " + fasttextModel + ")", (Throwable)e);
                throw new RuntimeException("Could not start fasttext process for language identification @ " + fasttextBinary + " with model @ " + fasttextModel, e);
            }
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList<String> langCodes = new ArrayList<String>();
        for (Language lang : Languages.get()) {
            String langCode = lang.getShortCode();
            boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
            if (ignore) continue;
            if ("zh".equals(langCode)) {
                langCodes.add("zh-CN");
                langCodes.add("zh-TW");
                continue;
            }
            langCodes.add(langCode);
        }
        return langCodes;
    }

    private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
        LanguageProfileReader profileReader = new LanguageProfileReader();
        List profiles = profileReader.read(langCodes);
        for (String externalLangCode : externalLangCodes) {
            String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
            if (!JLanguageTool.getDataBroker().resourceExists(profilePath)) continue;
            InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath);
            Throwable throwable = null;
            try {
                profiles.add(new LanguageProfileReader().read(profile));
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (profile == null) continue;
                if (throwable != null) {
                    try {
                        profile.close();
                    }
                    catch (Throwable throwable3) {
                        throwable.addSuppressed(throwable3);
                    }
                    continue;
                }
                profile.close();
            }
        }
        return profiles;
    }

    @Nullable
    public Language detectLanguage(String text) {
        DetectedLanguage detectedLanguage = this.detectLanguage(text, Collections.emptyList(), Collections.emptyList());
        if (detectedLanguage == null) {
            return null;
        }
        return detectedLanguage.getDetectedLanguage();
    }

    @Nullable
    @Experimental
    DetectedLanguage detectLanguageWithDetails(String text) {
        DetectedLanguage detectedLanguage = this.detectLanguage(text, Collections.emptyList(), Collections.emptyList());
        if (detectedLanguage == null) {
            return null;
        }
        return detectedLanguage;
    }

    @Nullable
    public DetectedLanguage detectLanguage(String text, List<String> noopLangsTmp, List<String> preferredLangsTmp) {
        Objects.requireNonNull(noopLangsTmp);
        Objects.requireNonNull(preferredLangsTmp);
        List<String> noopLangs = noopLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toList());
        List preferredLangs = preferredLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toList());
        if (preferredLangs.stream().anyMatch(k -> k.contains("-"))) {
            throw new IllegalArgumentException("preferredLanguages may only contain language codes without variants (e.g. 'en', but not 'en-US'): " + preferredLangs + ". Use 'preferredVariants' to specify variants");
        }
        String shortText = text.length() > this.maxLength ? text.substring(0, this.maxLength) : text;
        shortText = shortText.replaceAll("\ufeff+", " ");
        Map.Entry<String, Double> result = null;
        if (this.fasttextEnabled) {
            try {
                shortText = UrlTextFilter.getInstance().filter((CharSequence)shortText);
                shortText = new RemoveEMailSignatureFilter().filter(shortText);
                shortText = shortText.replaceAll("\ufeff+", " ");
                Map<String, Double> scores = this.runFasttext(shortText, noopLangs);
                result = this.getHighestScoringResult(scores);
                if (result.getValue().floatValue() < 0.9f) {
                    CommonWords commonWords = new CommonWords();
                    Map<Language, Integer> lang2Count = commonWords.getKnownWordsPerLanguage(text);
                    for (Map.Entry<Language, Integer> entry : lang2Count.entrySet()) {
                        String langCode = entry.getKey().getShortCode();
                        if (scores.containsKey(langCode)) {
                            scores.put(langCode, scores.get(langCode) + Double.valueOf(entry.getValue().intValue()));
                            continue;
                        }
                        scores.put(langCode, (double)entry.getValue());
                    }
                    result = this.getHighestScoringResult(scores);
                }
                if (text.length() < 50 && preferredLangs.size() > 0) {
                    scores.keySet().removeIf(k -> !preferredLangs.contains(k));
                    result = this.getHighestScoringResult(scores);
                }
                double newScore = 0.99 / (30.0 / (double)Math.min(text.length(), 30));
                result = new AbstractMap.SimpleImmutableEntry<String, Double>(result.getKey(), newScore);
            }
            catch (Exception e) {
                this.fasttextEnabled = false;
                RuleErrorNotification msg = new RuleErrorNotification(this.getClass().getSimpleName(), "-", String.format("Fasttext disabled, failed on '%s' (shortText='%s'): %s", text, shortText, ExceptionUtils.getStackTrace((Throwable)e)));
                RuleLoggerManager.getInstance().log(msg, Level.WARNING);
                this.fasttextProcess.destroy();
                logger.error(String.format("Fasttext disabled, failed on '%s' (shortText='%s')", text, shortText), (Throwable)e);
            }
        }
        if (!this.fasttextEnabled) {
            shortText = this.textObjectFactory.forText((CharSequence)shortText).toString();
            result = this.detectLanguageCode(shortText);
            if (noopLangs.size() > 0) {
                logger.warn("Cannot consider noopLanguages because not in fastText mode: " + noopLangs);
            }
        }
        if (result != null && result.getKey() != null && this.canLanguageBeDetected((String)result.getKey(), noopLangs)) {
            return new DetectedLanguage(null, Languages.getLanguageForShortCode(result.getKey(), noopLangs), result.getValue().floatValue());
        }
        return null;
    }

    private boolean canLanguageBeDetected(String langCode, List<String> additionalLanguageCodes) {
        return Languages.isLanguageSupported(langCode) || additionalLanguageCodes.contains(langCode);
    }

    private void startFasttext(File modelPath, File binaryPath) throws IOException {
        this.fasttextProcess = new ProcessBuilder(binaryPath.getPath(), "predict-prob", modelPath.getPath(), "-", "5").start();
        this.fasttextIn = new BufferedReader(new InputStreamReader(this.fasttextProcess.getInputStream(), StandardCharsets.UTF_8));
        this.fasttextOut = new BufferedWriter(new OutputStreamWriter(this.fasttextProcess.getOutputStream(), StandardCharsets.UTF_8));
    }

    private Map.Entry<String, Double> getHighestScoringResult(Map<String, Double> probs) {
        String result = null;
        double max = -1.0;
        for (Map.Entry<String, Double> entry : probs.entrySet()) {
            if (!(entry.getValue() > max)) continue;
            max = entry.getValue();
            result = entry.getKey();
        }
        return new AbstractMap.SimpleImmutableEntry<Object, Double>(result, max);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Map<String, Double> runFasttext(String text, List<String> additionalLanguageCodes) throws IOException {
        String buffer;
        HashMap<String, Double> probabilities = new HashMap<String, Double>();
        String joined = text.replace("\n", " ");
        LanguageIdentifier languageIdentifier = this;
        synchronized (languageIdentifier) {
            this.fasttextOut.write(joined);
            this.fasttextOut.newLine();
            this.fasttextOut.flush();
            buffer = this.fasttextIn.readLine();
            if (buffer == null) {
                try {
                    logger.warn("fasttextIn.readLine() returned null, trying again after short delay for input '" + text + "'");
                    Thread.sleep(10L);
                    buffer = this.fasttextIn.readLine();
                    if (buffer == null) {
                        logger.warn("fasttextIn.readLine() returned null again");
                    }
                }
                catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        String[] values = buffer.split(" ");
        if (values.length % 2 != 0) {
            logger.error("Error while parsing fasttext output '{}'", (Object)buffer);
            throw new RuntimeException("Error while parsing fasttext output: " + buffer);
        }
        for (int i = 0; i < values.length; i += 2) {
            String lang = values[i];
            String langCode = lang.substring(lang.lastIndexOf("__") + 2);
            String prob = values[i + 1];
            Double probValue = Double.parseDouble(prob);
            if (!this.canLanguageBeDetected(langCode, additionalLanguageCodes)) continue;
            probabilities.put(langCode, probValue);
        }
        return probabilities;
    }

    @Nullable
    private Map.Entry<String, Double> detectLanguageCode(String text) {
        List lang = this.languageDetector.getProbabilities((CharSequence)text);
        if (lang.size() > 0) {
            String code = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getLocale().getLanguage();
            double prob = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getProbability();
            return new AbstractMap.SimpleImmutableEntry<String, Double>(code, prob);
        }
        return null;
    }

    class RemoveEMailSignatureFilter
    implements TextFilter {
        RemoveEMailSignatureFilter() {
        }

        public String filter(CharSequence text) {
            return SIGNATURE.matcher(text.toString()).replaceFirst("");
        }
    }
}

