Skip to content

Commit

Permalink
Propagate DocValueFormat to all terms aggs. elastic#17646
Browse files Browse the repository at this point in the history
TL;DR This commit should not have any impact on terms aggs, it will just make
supporting ipv6 easier.

Currently only the numeric terms aggs propagate the DocValueFormat instance since
we use numerics to represent also dates or ip addresses. Since string terms aggs
are only used for text/keyword/string fields, they do not use the format and just
call toUt8String(). However when we support ipv6, ip addresses as well will be
encoded in sorted doc values (just like strings) so we will need to use the
DocValueFormat to format the keys.
  • Loading branch information
jpountz committed Apr 11, 2016
1 parent b08d453 commit 13c1efd
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
Expand Down Expand Up @@ -51,12 +52,13 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
private final SignificanceHeuristic significanceHeuristic;

public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories,
ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds,
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, DocValueFormat format,
BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
AggregationContext aggregationContext, Aggregator parent,
SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {

super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent,
super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, aggregationContext, parent,
SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
this.significanceHeuristic = significanceHeuristic;
this.termsAggFactory = termsAggFactory;
Expand Down Expand Up @@ -159,11 +161,11 @@ public static class WithHash extends GlobalOrdinalsSignificantTermsAggregator {
private final LongHash bucketOrds;

public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, significanceHeuristic,
super(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, aggregationContext, parent, significanceHeuristic,
termsAggFactory, pipelineAggregators, metaData);
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
Expand Down Expand Up @@ -49,12 +50,12 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
protected final SignificantTermsAggregatorFactory termsAggFactory;
private final SignificanceHeuristic significanceHeuristic;

public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
BucketCountThresholds bucketCountThresholds, IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext,
Aggregator parent, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {

super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent,
super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, aggregationContext, parent,
SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
this.significanceHeuristic = significanceHeuristic;
this.termsAggFactory = termsAggFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
Expand All @@ -32,6 +31,7 @@
import org.elasticsearch.common.lucene.index.FreqTermsEnum;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
Expand Down Expand Up @@ -205,7 +205,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
}
}
assert execution != null;
return execution.create(name, factories, valuesSource, bucketCountThresholds, includeExclude, context, parent,
return execution.create(name, factories, valuesSource, config.format(), bucketCountThresholds, includeExclude, context, parent,
significanceHeuristic, this, pipelineAggregators, metaData);
}

Expand Down Expand Up @@ -237,46 +237,45 @@ public enum ExecutionMode {
MAP(new ParseField("map")) {

@Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, filter,
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
}

},
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {

@Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent,
significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
}

},
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {

@Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent,
significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
}
};

Expand All @@ -295,7 +294,7 @@ public static ExecutionMode fromString(String value, ParseFieldMatcher parseFiel
this.parseField = parseField;
}

abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.search.aggregations.bucket.terms;

import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
Expand All @@ -35,15 +36,15 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator {
protected final boolean showTermDocCountError;

public AbstractStringTermsAggregator(String name, AggregatorFactories factories, AggregationContext context, Aggregator parent,
Terms.Order order, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode subAggCollectMode,
Terms.Order order, DocValueFormat format, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
super(name, factories, context, parent, bucketCountThresholds, order, subAggCollectMode, pipelineAggregators, metaData);
super(name, factories, context, parent, bucketCountThresholds, order, format, subAggCollectMode, pipelineAggregators, metaData);
this.showTermDocCountError = showTermDocCountError;
}

@Override
public InternalAggregation buildEmptyAggregation() {
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
return new StringTerms(name, order, format, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket> emptyList(), showTermDocCountError, 0, 0,
pipelineAggregators(), metaData());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;

import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
Expand Down Expand Up @@ -152,17 +151,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
}
}

private DocValueFormat format;

DoubleTerms() {
} // for serialization

public DoubleTerms(String name, Terms.Order order, DocValueFormat format, int requiredSize, int shardSize,
long minDocCount, List<? extends InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError,
long otherDocCount, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount, pipelineAggregators,
super(name, order, format, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount, pipelineAggregators,
metaData);
this.format = format;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
Expand Down Expand Up @@ -71,13 +72,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
protected RandomAccessOrds globalOrds;

public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds,
IncludeExclude.OrdinalsFilter includeExclude,
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError,
pipelineAggregators,
metaData);
Terms.Order order, DocValueFormat format, BucketCountThresholds bucketCountThresholds,
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError,
pipelineAggregators, metaData);
this.valuesSource = valuesSource;
this.includeExclude = includeExclude;
}
Expand Down Expand Up @@ -187,7 +187,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
survivingBucketOrds[i] = bucket.bucketOrd;
BytesRef scratch = new BytesRef();
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0);
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
list[i].bucketOrd = bucket.bucketOrd;
otherDocCount -= list[i].docCount;
}
Expand All @@ -201,7 +201,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
bucket.docCountError = 0;
}

return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
return new StringTerms(name, order, format, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount, pipelineAggregators(),
metaData());
}
Expand Down Expand Up @@ -266,12 +266,12 @@ public static class WithHash extends GlobalOrdinalsStringTermsAggregator {

private final LongHash bucketOrds;

public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext,
Aggregator parent, SubAggCollectionMode collectionMode,
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, Terms.Order order,
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode,
throws IOException {
super(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode,
showTermDocCountError, pipelineAggregators, metaData);
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
}
Expand Down Expand Up @@ -340,11 +340,11 @@ public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
private RandomAccessOrds segmentOrds;

public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
Terms.Order order,
Terms.Order order, DocValueFormat format,
BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent,
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, order, bucketCountThresholds, null, aggregationContext, parent, collectionMode,
super(name, factories, valuesSource, order, format, bucketCountThresholds, null, aggregationContext, parent, collectionMode,
showTermDocCountError, pipelineAggregators, metaData);
assert factories == null || factories.countAggregators() == 0;
this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
Expand Down
Loading

0 comments on commit 13c1efd

Please sign in to comment.