/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.aggs.categorization;

import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.AggregationExecutionContext;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.DeferableBucketAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.search.lookup.SourceFilter;
import org.elasticsearch.search.lookup.SourceProvider;
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary;
import org.elasticsearch.xpack.ml.aggs.categorization.InternalCategorizationAggregation;
import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer;
import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategory;
import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer;

public class CategorizeTextAggregator
extends DeferableBucketAggregator {
    private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
    private final SourceProvider sourceProvider;
    private final SourceFilter sourceFilter;
    private final MappedFieldType fieldType;
    private final CategorizationAnalyzer analyzer;
    private final String sourceFieldName;
    private ObjectArray<TokenListCategorizer> categorizers;
    private final int similarityThreshold;
    private final LongKeyedBucketOrds bucketOrds;
    private final CategorizationBytesRefHash bytesRefHash;
    private final CategorizationPartOfSpeechDictionary partOfSpeechDictionary;

    protected CategorizeTextAggregator(String name, AggregatorFactories factories, AggregationContext context, Aggregator parent, String sourceFieldName, MappedFieldType fieldType, TermsAggregator.BucketCountThresholds bucketCountThresholds, int similarityThreshold, CategorizationAnalyzerConfig categorizationAnalyzerConfig, Map<String, Object> metadata) throws IOException {
        super(name, factories, context, parent, metadata);
        this.sourceProvider = context.lookup();
        this.sourceFieldName = sourceFieldName;
        this.sourceFilter = new SourceFilter(new String[]{sourceFieldName}, Strings.EMPTY_ARRAY);
        this.fieldType = fieldType;
        CategorizationAnalyzerConfig analyzerConfig = Optional.ofNullable(categorizationAnalyzerConfig).orElse(CategorizationAnalyzerConfig.buildStandardCategorizationAnalyzer(List.of()));
        String analyzerName = analyzerConfig.getAnalyzer();
        if (analyzerName != null) {
            Analyzer globalAnalyzer = context.getNamedAnalyzer(analyzerName);
            if (globalAnalyzer == null) {
                throw new IllegalArgumentException("Failed to find global analyzer [" + analyzerName + "]");
            }
            this.analyzer = new CategorizationAnalyzer(globalAnalyzer, false);
        } else {
            this.analyzer = new CategorizationAnalyzer(context.buildCustomAnalyzer(context.getIndexSettings(), false, analyzerConfig.getTokenizer(), analyzerConfig.getCharFilters(), analyzerConfig.getTokenFilters()), true);
        }
        this.categorizers = context.bigArrays().newObjectArray(1L);
        this.similarityThreshold = similarityThreshold;
        this.bucketOrds = LongKeyedBucketOrds.build((BigArrays)context.bigArrays(), (CardinalityUpperBound)CardinalityUpperBound.MANY);
        this.bucketCountThresholds = bucketCountThresholds;
        this.bytesRefHash = new CategorizationBytesRefHash(new BytesRefHash(2048L, context.bigArrays()));
        this.partOfSpeechDictionary = CategorizationPartOfSpeechDictionary.getInstance();
    }

    protected void doClose() {
        super.doClose();
        Releasables.close((Releasable[])new Releasable[]{this.analyzer, this.bytesRefHash, this.bucketOrds, this.categorizers});
    }

    public InternalAggregation[] buildAggregations(LongArray ordsToCollect) throws IOException {
        try (ObjectArray topBucketsPerOrd = this.bigArrays().newObjectArray(ordsToCollect.size());){
            for (long ordIdx = 0L; ordIdx < ordsToCollect.size(); ++ordIdx) {
                TokenListCategorizer categorizer;
                long ord = ordsToCollect.get(ordIdx);
                TokenListCategorizer tokenListCategorizer = categorizer = ord < this.categorizers.size() ? (TokenListCategorizer)this.categorizers.get(ord) : null;
                if (categorizer == null) {
                    topBucketsPerOrd.set(ordIdx, (Object)new InternalCategorizationAggregation.Bucket[0]);
                    continue;
                }
                int size = (int)Math.min(this.bucketOrds.bucketsInOrd(ordIdx), (long)this.bucketCountThresholds.getShardSize());
                topBucketsPerOrd.set(ordIdx, (Object)categorizer.toOrderedBuckets(size));
            }
            this.buildSubAggsForAllBuckets(topBucketsPerOrd, InternalCategorizationAggregation.Bucket::getBucketOrd, InternalCategorizationAggregation.Bucket::setAggregations);
            InternalAggregation[] results = new InternalAggregation[Math.toIntExact(ordsToCollect.size())];
            for (int ordIdx = 0; ordIdx < results.length; ++ordIdx) {
                results[ordIdx] = new InternalCategorizationAggregation(this.name, this.bucketCountThresholds.getRequiredSize(), this.bucketCountThresholds.getMinDocCount(), this.similarityThreshold, this.metadata(), Arrays.asList((InternalCategorizationAggregation.Bucket[])topBucketsPerOrd.get((long)ordIdx)));
            }
            InternalAggregation[] internalAggregationArray = results;
            return internalAggregationArray;
        }
    }

    public InternalAggregation buildEmptyAggregation() {
        return new InternalCategorizationAggregation(this.name, this.bucketCountThresholds.getRequiredSize(), this.bucketCountThresholds.getMinDocCount(), this.similarityThreshold, this.metadata());
    }

    protected LeafBucketCollector getLeafCollector(final AggregationExecutionContext aggCtx, final LeafBucketCollector sub) {
        return new LeafBucketCollectorBase(sub, null){

            public void collect(int doc, long owningBucketOrd) throws IOException {
                CategorizeTextAggregator.this.categorizers = CategorizeTextAggregator.this.bigArrays().grow(CategorizeTextAggregator.this.categorizers, owningBucketOrd + 1L);
                TokenListCategorizer categorizer = (TokenListCategorizer)CategorizeTextAggregator.this.categorizers.get(owningBucketOrd);
                if (categorizer == null) {
                    categorizer = new TokenListCategorizer(CategorizeTextAggregator.this.bytesRefHash, CategorizeTextAggregator.this.partOfSpeechDictionary, (float)CategorizeTextAggregator.this.similarityThreshold / 100.0f);
                    CategorizeTextAggregator.this.addRequestCircuitBreakerBytes(categorizer.ramBytesUsed());
                    CategorizeTextAggregator.this.categorizers.set(owningBucketOrd, (Object)categorizer);
                }
                this.collectFromSource(doc, owningBucketOrd, categorizer);
            }

            private void collectFromSource(int doc, long owningBucketOrd, TokenListCategorizer categorizer) throws IOException {
                Source source = CategorizeTextAggregator.this.sourceProvider.getSource(aggCtx.getLeafReaderContext(), doc).filter(CategorizeTextAggregator.this.sourceFilter);
                Iterator itr = Iterators.map(XContentMapValues.extractRawValues((String)CategorizeTextAggregator.this.sourceFieldName, (Map)source.source()).iterator(), obj -> {
                    if (obj instanceof BytesRef) {
                        return CategorizeTextAggregator.this.fieldType.valueForDisplay(obj).toString();
                    }
                    return obj == null ? null : obj.toString();
                });
                while (itr.hasNext()) {
                    String string = (String)itr.next();
                    TokenStream ts = CategorizeTextAggregator.this.analyzer.tokenStream(CategorizeTextAggregator.this.fieldType.name(), string);
                    try {
                        this.processTokenStream(owningBucketOrd, ts, string.length(), doc, categorizer);
                    }
                    finally {
                        if (ts == null) continue;
                        ts.close();
                    }
                }
            }

            private void processTokenStream(long owningBucketOrd, TokenStream ts, int unfilteredLength, int doc, TokenListCategorizer categorizer) throws IOException {
                long previousSize = categorizer.ramBytesUsed();
                TokenListCategory category = categorizer.computeCategory(ts, unfilteredLength, (long)CategorizeTextAggregator.this.docCountProvider.getDocCount(doc));
                if (category == null) {
                    return;
                }
                long sizeDiff = categorizer.ramBytesUsed() - previousSize;
                CategorizeTextAggregator.this.addRequestCircuitBreakerBytes(sizeDiff);
                long bucketOrd = CategorizeTextAggregator.this.bucketOrds.add(owningBucketOrd, (long)category.getId());
                if (bucketOrd < 0L) {
                    CategorizeTextAggregator.this.collectExistingBucket(sub, doc, -1L - bucketOrd);
                } else {
                    category.setBucketOrd(bucketOrd);
                    CategorizeTextAggregator.this.collectBucket(sub, doc, bucketOrd);
                }
            }
        };
    }
}

