/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.clustering;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Processing;
import org.carrot2.shaded.guava.common.base.Function;
import org.carrot2.shaded.guava.common.base.Predicate;
import org.carrot2.shaded.guava.common.collect.HashMultiset;
import org.carrot2.shaded.guava.common.collect.ImmutableList;
import org.carrot2.shaded.guava.common.collect.ImmutableListMultimap;
import org.carrot2.shaded.guava.common.collect.Iterators;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.shaded.guava.common.collect.Maps;
import org.carrot2.shaded.guava.common.collect.Multimaps;
import org.carrot2.shaded.guava.common.collect.Multiset;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable(prefix="MultilingualClustering")
public class MultilingualClustering {
    private static final String MULTILINGUAL_CLUSTERING = "Multilingual clustering";
    private static final Logger logger = LoggerFactory.getLogger(MultilingualClustering.class);
    @Input
    @Processing
    @Attribute
    @Required
    @Group(value="Multilingual clustering")
    @Level(value=AttributeLevel.MEDIUM)
    public LanguageAggregationStrategy languageAggregationStrategy = LanguageAggregationStrategy.FLATTEN_MAJOR_LANGUAGE;
    @Input
    @Processing
    @Attribute
    @Required
    @Group(value="Multilingual clustering")
    @Level(value=AttributeLevel.MEDIUM)
    public LanguageCode defaultLanguage = LanguageCode.ENGLISH;
    @Output
    @Processing
    @Attribute
    @Group(value="Multilingual clustering")
    @Level(value=AttributeLevel.MEDIUM)
    public Map<String, Integer> languageCounts;
    @Output
    @Processing
    @Attribute
    @Group(value="Multilingual clustering")
    @Level(value=AttributeLevel.MEDIUM)
    public String majorityLanguage = "";

    public List<Cluster> process(List<Document> documents, IMonolingualClusteringAlgorithm algorithm) {
        this.languageCounts = Maps.newHashMap();
        if (documents.isEmpty()) {
            return Lists.newArrayList();
        }
        if (LanguageAggregationStrategy.CLUSTER_IN_MAJORITY_LANGUAGE.equals((Object)this.languageAggregationStrategy)) {
            return this.clusterInMajorityLanguage(documents, algorithm);
        }
        Map<LanguageCode, Cluster> clustersByLanguage = this.clusterByLanguage(documents, algorithm);
        ArrayList clusters = Lists.newArrayList(clustersByLanguage.values());
        if (clustersByLanguage.size() == 1 || LanguageAggregationStrategy.FLATTEN_ALL.equals((Object)this.languageAggregationStrategy)) {
            ArrayList flattenedClusters = Lists.newArrayList();
            for (Cluster cluster : clusters) {
                List<Cluster> subclusters = cluster.getSubclusters();
                for (Cluster subcluster : subclusters) {
                    if (subcluster.isOtherTopics()) continue;
                    flattenedClusters.add(subcluster);
                }
            }
            if (clustersByLanguage.size() > 1) {
                Collections.sort(flattenedClusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
            }
            Cluster.appendOtherTopics(documents, flattenedClusters);
            return flattenedClusters;
        }
        Collections.sort(clusters, Collections.reverseOrder(Cluster.BY_SIZE_COMPARATOR));
        if (LanguageAggregationStrategy.FLATTEN_MAJOR_LANGUAGE.equals((Object)this.languageAggregationStrategy)) {
            Iterator iterator = clusters.iterator();
            Cluster majorLanguageCluster = null;
            try {
                majorLanguageCluster = (Cluster)Iterators.find(iterator, (Predicate)new Predicate<Cluster>(){

                    public boolean apply(Cluster cluster) {
                        return !cluster.getSubclusters().isEmpty();
                    }
                });
            }
            catch (NoSuchElementException cluster) {
                // empty catch block
            }
            if (majorLanguageCluster != null) {
                iterator.remove();
                ArrayList flattenedClusters = Lists.newArrayList();
                flattenedClusters.addAll(majorLanguageCluster.getSubclusters());
                Cluster otherLanguages = new Cluster("Other Languages", new Document[0]);
                otherLanguages.addSubclusters(clusters);
                flattenedClusters.add(otherLanguages);
                return flattenedClusters;
            }
            return clusters;
        }
        return clusters;
    }

    private Map<LanguageCode, Cluster> clusterByLanguage(List<Document> documents, IMonolingualClusteringAlgorithm algorithm) {
        ImmutableListMultimap documentsByLanguage = Multimaps.index(documents, (Function)new Function<Document, String>(){

            public String apply(Document document) {
                LanguageCode language = document.getLanguage();
                return language != null ? language.name() : "";
            }
        });
        HashMap clusters = Maps.newHashMap();
        for (String language : documentsByLanguage.keySet()) {
            ImmutableList languageDocuments = documentsByLanguage.get((Object)language);
            LanguageCode languageCode = language.equals("") ? null : LanguageCode.valueOf(language);
            Cluster languageCluster = new Cluster(languageCode != null ? languageCode.toString() : "Unknown Language", new Document[0]);
            this.languageCounts.put(languageCode != null ? languageCode.getIsoCode() : "", languageDocuments.size());
            LanguageCode currentLanguage = languageCode != null ? languageCode : this.defaultLanguage;
            logger.debug("Performing monolingual clustering in: " + (Object)((Object)currentLanguage));
            List<Cluster> clustersForLanguage = algorithm.process((List<Document>)languageDocuments, currentLanguage);
            if (clustersForLanguage.size() == 0 || clustersForLanguage.size() == 1 && clustersForLanguage.get(0).isOtherTopics()) {
                languageCluster.addDocuments((Iterable<Document>)languageDocuments);
            } else {
                languageCluster.addSubclusters(clustersForLanguage);
            }
            clusters.put(languageCode, languageCluster);
        }
        return clusters;
    }

    private List<Cluster> clusterInMajorityLanguage(List<Document> documents, IMonolingualClusteringAlgorithm algorithm) {
        HashMultiset counts = HashMultiset.create();
        for (Document d : documents) {
            counts.add((Object)d.getLanguage());
        }
        LanguageCode majorityLanguage = this.defaultLanguage;
        int maxCount = 0;
        for (Multiset.Entry entry : counts.entrySet()) {
            if (entry.getElement() != null && entry.getCount() > maxCount) {
                maxCount = entry.getCount();
                majorityLanguage = (LanguageCode)((Object)entry.getElement());
                this.majorityLanguage = ((LanguageCode)((Object)entry.getElement())).getIsoCode();
            }
            this.languageCounts.put(entry.getElement() != null ? ((LanguageCode)((Object)entry.getElement())).getIsoCode() : "", entry.getCount());
        }
        logger.debug("Performing clustering in majority language: " + (Object)((Object)majorityLanguage));
        List<Cluster> clusters = algorithm.process(documents, majorityLanguage);
        Cluster.appendOtherTopics(documents, clusters);
        return clusters;
    }

    public static enum LanguageAggregationStrategy {
        FLATTEN_ALL("Flatten clusters from all languages"),
        FLATTEN_MAJOR_LANGUAGE("Flatten clusters from the majority language"),
        FLATTEN_NONE("Dedicated parent cluster for each language"),
        CLUSTER_IN_MAJORITY_LANGUAGE("Cluster all documents assuming the language of the majority");

        private String label;

        private LanguageAggregationStrategy(String label) {
            this.label = label;
        }

        public String toString() {
            return this.label;
        }
    }
}

