/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.kmeans;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.mahout.math.function.Functions;
import org.carrot2.mahout.math.matrix.DoubleMatrix1D;
import org.carrot2.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.mahout.math.matrix.impl.DenseDoubleMatrix1D;
import org.carrot2.mahout.math.matrix.impl.DenseDoubleMatrix2D;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="BisectingKMeansClusteringAlgorithm", inherit={CommonAttributes.class})
public class BisectingKMeansClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    private static final String GROUP_KMEANS = "K-means";
    @Processing
    @Input
    @Required
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2)
    @Group(value="Clusters")
    @Level(value=AttributeLevel.BASIC)
    @Label(value="Cluster count")
    public int clusterCount = 25;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    @Group(value="K-means")
    @Level(value=AttributeLevel.BASIC)
    @Label(value="Maximum iterations")
    public int maxIterations = 15;
    @Processing
    @Input
    @Attribute
    @Group(value="K-means")
    @Level(value=AttributeLevel.BASIC)
    @Label(value="Use dimensionality reduction")
    public boolean useDimensionalityReduction = true;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2, max=10)
    @Group(value="K-means")
    @Level(value=AttributeLevel.BASIC)
    @Label(value="Partition count")
    public int partitionCount = 2;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1, max=10)
    @Group(value="Clusters")
    @Level(value=AttributeLevel.BASIC)
    @Label(value="Label count")
    public int labelCount = 3;
    @Init
    @Input
    @Attribute
    @Internal
    @ImplementingClasses(classes={}, strict=false)
    @Level(value=AttributeLevel.ADVANCED)
    public IPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final LabelFormatter labelFormatter = new LabelFormatter();
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();
    private static final Comparator<IntArrayList> BY_SIZE_DESCENDING = new Comparator<IntArrayList>(){

        @Override
        public int compare(IntArrayList o1, IntArrayList o2) {
            return o2.size() - o1.size();
        }
    };

    @Override
    public void process() throws ProcessingException {
        List<Document> originalDocuments = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm(){

            @Override
            public List<Cluster> process(List<Document> documents, LanguageCode language) {
                BisectingKMeansClusteringAlgorithm.this.documents = documents;
                BisectingKMeansClusteringAlgorithm.this.cluster(language);
                return BisectingKMeansClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = originalDocuments;
    }

    protected void cluster(LanguageCode language) {
        PreprocessingContext preprocessingContext = this.preprocessingPipeline.preprocess(this.documents, null, language);
        int[] stemsMfow = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
        short[] wordsType = preprocessingContext.allWords.type;
        IntArrayList featureIndices = new IntArrayList(stemsMfow.length);
        for (int i = 0; i < stemsMfow.length; ++i) {
            short flag = wordsType[stemsMfow[i]];
            if ((flag & 0x3002) != 0) continue;
            featureIndices.add(stemsMfow[i]);
        }
        preprocessingContext.allLabels.featureIndex = featureIndices.toArray();
        preprocessingContext.allLabels.firstPhraseIndex = -1;
        this.clusters = Lists.newArrayList();
        if (preprocessingContext.hasLabels()) {
            IntArrayList largest;
            DoubleMatrix2D tdMatrix;
            VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(preprocessingContext);
            ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(vsmContext);
            this.matrixBuilder.buildTermDocumentMatrix(vsmContext);
            this.matrixBuilder.buildTermPhraseMatrix(vsmContext);
            IntIntHashMap rowToStemIndex = new IntIntHashMap();
            for (IntIntCursor c : vsmContext.stemToRowIndex) {
                rowToStemIndex.put(c.value, c.key);
            }
            if (this.useDimensionalityReduction && this.clusterCount * 2 < preprocessingContext.documents.size()) {
                this.matrixReducer.reduce(reducedVsmContext, this.clusterCount * 2);
                tdMatrix = reducedVsmContext.coefficientMatrix.viewDice();
            } else {
                tdMatrix = vsmContext.termDocumentMatrix;
            }
            IntArrayList columns = new IntArrayList(tdMatrix.columns());
            for (int c = 0; c < tdMatrix.columns(); ++c) {
                columns.add(c);
            }
            ArrayList rawClusters = Lists.newArrayList();
            rawClusters.addAll(this.split(this.partitionCount, tdMatrix, columns, this.maxIterations));
            Collections.sort(rawClusters, BY_SIZE_DESCENDING);
            int largestIndex = 0;
            while (rawClusters.size() < this.clusterCount && largestIndex < rawClusters.size() && (largest = (IntArrayList)rawClusters.get(largestIndex)).size() > this.partitionCount * 2) {
                List<IntArrayList> split = this.split(this.partitionCount, tdMatrix, largest, this.maxIterations);
                if (split.size() > 1) {
                    rawClusters.remove(largestIndex);
                    rawClusters.addAll(split);
                    Collections.sort(rawClusters, BY_SIZE_DESCENDING);
                    largestIndex = 0;
                    continue;
                }
                ++largestIndex;
            }
            for (int i = 0; i < rawClusters.size(); ++i) {
                Cluster cluster = new Cluster();
                IntArrayList rawCluster = (IntArrayList)rawClusters.get(i);
                if (rawCluster.size() <= 1) continue;
                cluster.addPhrases(this.getLabels(rawCluster, vsmContext.termDocumentMatrix, rowToStemIndex, preprocessingContext.allStems.mostFrequentOriginalWordIndex, preprocessingContext.allWords.image));
                for (int j = 0; j < rawCluster.size(); ++j) {
                    cluster.addDocuments(this.documents.get(rawCluster.get(j)));
                }
                this.clusters.add(cluster);
            }
        }
        Collections.sort(this.clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    private List<String> getLabels(IntArrayList documents, DoubleMatrix2D termDocumentMatrix, IntIntHashMap rowToStemIndex, int[] mostFrequentOriginalWordIndex, char[][] wordImage) {
        final DenseDoubleMatrix1D centroid = new DenseDoubleMatrix1D(termDocumentMatrix.rows());
        for (IntCursor d : documents) {
            ((DoubleMatrix1D)centroid).assign(termDocumentMatrix.viewColumn(d.value), Functions.PLUS);
        }
        ArrayList labels = Lists.newArrayListWithCapacity((int)this.labelCount);
        int[] order = IndirectSort.mergesort((int)0, (int)centroid.size(), (IndirectComparator)new IndirectComparator(){

            public int compare(int a, int b) {
                double valueB;
                double valueA = centroid.get(a);
                return valueA < (valueB = centroid.get(b)) ? -1 : (valueA > valueB ? 1 : 0);
            }
        });
        double minValueForLabel = centroid.get(order[order.length - Math.min(this.labelCount, order.length)]);
        for (int i = 0; i < centroid.size(); ++i) {
            if (!(((DoubleMatrix1D)centroid).getQuick(i) >= minValueForLabel)) continue;
            labels.add(LabelFormatter.format(new char[][]{wordImage[mostFrequentOriginalWordIndex[rowToStemIndex.get(i)]]}, new boolean[]{false}, false));
        }
        return labels;
    }

    private List<IntArrayList> split(int partitions, DoubleMatrix2D input, IntArrayList columns, int iterations) {
        int i;
        DoubleMatrix2D selected = input.viewSelection(null, columns.toArray()).copy();
        IntIntHashMap selectedToInput = new IntIntHashMap(selected.columns());
        for (int i2 = 0; i2 < columns.size(); ++i2) {
            selectedToInput.put(i2, columns.get(i2));
        }
        ArrayList result = Lists.newArrayList();
        ArrayList previousResult = null;
        for (i = 0; i < partitions; ++i) {
            result.add(new IntArrayList(selected.columns()));
        }
        for (i = 0; i < selected.columns(); ++i) {
            ((IntArrayList)result.get(i % partitions)).add(i);
        }
        DoubleMatrix2D centroids = new DenseDoubleMatrix2D(selected.rows(), partitions).assign(selected.viewPart(0, 0, selected.rows(), partitions));
        DenseDoubleMatrix2D similarities = new DenseDoubleMatrix2D(partitions, selected.columns());
        for (int it = 0; it < iterations; ++it) {
            int i3;
            for (i3 = 0; i3 < result.size(); ++i3) {
                IntArrayList cluster = (IntArrayList)result.get(i3);
                for (int k = 0; k < selected.rows(); ++k) {
                    double sum = 0.0;
                    for (int j = 0; j < cluster.size(); ++j) {
                        sum += selected.get(k, cluster.get(j));
                    }
                    centroids.setQuick(k, i3, sum / (double)cluster.size());
                }
            }
            previousResult = result;
            result = Lists.newArrayList();
            for (i3 = 0; i3 < partitions; ++i3) {
                result.add(new IntArrayList(selected.columns()));
            }
            centroids.zMult(selected, similarities, 1.0, 0.0, true, false);
            for (int c = 0; c < similarities.columns(); ++c) {
                int maxRow = 0;
                double max = similarities.get(0, c);
                for (int r = 1; r < similarities.rows(); ++r) {
                    if (!(max < similarities.get(r, c))) continue;
                    max = similarities.get(r, c);
                    maxRow = r;
                }
                ((IntArrayList)result.get(maxRow)).add(c);
            }
            if (Objects.equals(previousResult, result)) break;
        }
        Iterator it = result.iterator();
        while (it.hasNext()) {
            IntArrayList cluster = (IntArrayList)it.next();
            if (cluster.isEmpty()) {
                it.remove();
                continue;
            }
            for (int j = 0; j < cluster.size(); ++j) {
                cluster.set(j, selectedToInput.get(cluster.get(j)));
            }
        }
        return result;
    }
}

