Skip to content

Commit

Permalink
Simplify AllEntries, AllField and AllFieldMapper:
Browse files Browse the repository at this point in the history
  * Create one AllField field per field eligible for _all.
  * Add a positionIncrementGap (with a size of 100, not configurable) between
  each entry in order to distinguish fields when doing phrase query on _all.
  • Loading branch information
jimczi committed Apr 8, 2016
1 parent 0d8e399 commit c565718
Show file tree
Hide file tree
Showing 14 changed files with 368 additions and 548 deletions.
3 changes: 0 additions & 3 deletions buildSrc/src/main/resources/checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]io[/\\]Channels.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]joda[/\\]Joda.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]Lucene.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]all[/\\]AllTermQuery.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]ElasticsearchDirectoryReader.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]FilterableTermsEnum.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]FreqTermsEnum.java" checks="LineLength" />
Expand Down Expand Up @@ -485,7 +484,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoPointFieldMapper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoPointFieldMapperLegacy.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoShapeFieldMapper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]AllFieldMapper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]FieldNamesFieldMapper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]IdFieldMapper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]IndexFieldMapper.java" checks="LineLength" />
Expand Down Expand Up @@ -1068,7 +1066,6 @@
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]DynamicMappingTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]FieldTypeTestCase.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]MapperServiceTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]all[/\\]SimpleAllMapperTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]binary[/\\]BinaryMappingTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]boost[/\\]CustomBoostMappingTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]boost[/\\]FieldLevelBoostTests.java" checks="LineLength" />
Expand Down
185 changes: 7 additions & 178 deletions core/src/main/java/org/elasticsearch/common/lucene/all/AllEntries.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,24 @@

package org.elasticsearch.common.lucene.all;

import org.elasticsearch.common.io.FastCharArrayWriter;
import org.elasticsearch.common.io.FastStringReader;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
*
*/
public class AllEntries extends Reader {

public class AllEntries {
public static class Entry {
private final String name;
private final FastStringReader reader;
private final int startOffset;
private final String value;
private final float boost;

public Entry(String name, FastStringReader reader, int startOffset, float boost) {
public Entry(String name, String value, float boost) {
this.name = name;
this.reader = reader;
this.startOffset = startOffset;
this.value = value;
this.boost = boost;
}

public int startOffset() {
return startOffset;
}

public String name() {
return this.name;
}
Expand All @@ -60,179 +45,23 @@ public float boost() {
return this.boost;
}

public FastStringReader reader() {
return this.reader;
public String value() {
return this.value;
}
}

private final List<Entry> entries = new ArrayList<>();

private Entry current;

private Iterator<Entry> it;

private boolean itsSeparatorTime = false;

private boolean customBoost = false;

public void addText(String name, String text, float boost) {
if (boost != 1.0f) {
customBoost = true;
}
final int lastStartOffset;
if (entries.isEmpty()) {
lastStartOffset = -1;
} else {
final Entry last = entries.get(entries.size() - 1);
lastStartOffset = last.startOffset() + last.reader().length();
}
final int startOffset = lastStartOffset + 1; // +1 because we insert a space between tokens
Entry entry = new Entry(name, new FastStringReader(text), startOffset, boost);
Entry entry = new Entry(name, text, boost);
entries.add(entry);
}

public boolean customBoost() {
return customBoost;
}

public void clear() {
this.entries.clear();
this.current = null;
this.it = null;
itsSeparatorTime = false;
}

@Override
public void reset() {
try {
for (Entry entry : entries) {
entry.reader().reset();
}
} catch (IOException e) {
throw new IllegalStateException("should not happen");
}
it = entries.iterator();
if (it.hasNext()) {
current = it.next();
itsSeparatorTime = true;
}
}


public String buildText() {
reset();
FastCharArrayWriter writer = new FastCharArrayWriter();
for (Entry entry : entries) {
writer.append(entry.reader());
writer.append(' ');
}
reset();
return writer.toString();
}

public List<Entry> entries() {
return this.entries;
}

public Set<String> fields() {
Set<String> fields = new HashSet<>();
for (Entry entry : entries) {
fields.add(entry.name());
}
return fields;
}

// compute the boost for a token with the given startOffset
public float boost(int startOffset) {
if (!entries.isEmpty()) {
int lo = 0, hi = entries.size() - 1;
while (lo <= hi) {
final int mid = (lo + hi) >>> 1;
final int midOffset = entries.get(mid).startOffset();
if (startOffset < midOffset) {
hi = mid - 1;
} else {
lo = mid + 1;
}
}
final int index = Math.max(0, hi); // protection against broken token streams
assert entries.get(index).startOffset() <= startOffset;
assert index == entries.size() - 1 || entries.get(index + 1).startOffset() > startOffset;
return entries.get(index).boost();
}
return 1.0f;
}

@Override
public int read(char[] cbuf, int off, int len) throws IOException {
if (current == null) {
return -1;
}
if (customBoost) {
int result = current.reader().read(cbuf, off, len);
if (result == -1) {
if (itsSeparatorTime) {
itsSeparatorTime = false;
cbuf[off] = ' ';
return 1;
}
itsSeparatorTime = true;
// close(); No need to close, we work on in mem readers
if (it.hasNext()) {
current = it.next();
} else {
current = null;
}
return read(cbuf, off, len);
}
return result;
} else {
int read = 0;
while (len > 0) {
int result = current.reader().read(cbuf, off, len);
if (result == -1) {
if (it.hasNext()) {
current = it.next();
} else {
current = null;
if (read == 0) {
return -1;
}
return read;
}
cbuf[off++] = ' ';
read++;
len--;
} else {
read += result;
off += result;
len -= result;
}
}
return read;
}
}

@Override
public void close() {
if (current != null) {
// no need to close, these are readers on strings
current = null;
}
}


@Override
public boolean ready() throws IOException {
return (current != null) && current.reader().ready();
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Entry entry : entries) {
sb.append(entry.name()).append(',');
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,60 +24,26 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.ElasticsearchException;

import java.io.IOException;
import java.io.Reader;

/**
*
*/
public class AllField extends Field {
private final float boost;

private final AllEntries allEntries;

private final Analyzer analyzer;

public AllField(String name, AllEntries allEntries, Analyzer analyzer, FieldType fieldType) {
super(name, fieldType);
this.allEntries = allEntries;
this.analyzer = analyzer;
}

@Override
public String stringValue() {
if (fieldType().stored()) {
return allEntries.buildText();
}
return null;
}

@Override
public Reader readerValue() {
return null;
}

/** Returns the {@link AllEntries} containing the original text fields for the document. */
public AllEntries getAllEntries() {
return allEntries;
public AllField(String name, String value, float boost, FieldType fieldType) {
super(name, value, fieldType);
this.boost = boost;
}

@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
try {
allEntries.reset(); // reset the all entries, just in case it was read already
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
// but we need to be careful this optimization is safe (and tested)...

// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
// positions are indexed:
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
} else {
return analyzer.tokenStream(name, allEntries);
}
} catch (IOException e) {
throw new ElasticsearchException("Failed to create token stream");
TokenStream ts = analyzer.tokenStream(name(), stringValue());
if (boost != 1.0f && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
// but we need to be careful this optimization is safe (and tested)...

// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
// positions are indexed:
return new AllTokenStream(ts, boost);
}
return ts;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import org.apache.lucene.search.similarities.Similarity.SimWeight;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat;
import org.apache.lucene.util.ToStringUtils;

import java.io.IOException;
import java.util.Set;
Expand Down Expand Up @@ -129,7 +128,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
Explanation payloadBoostExplanation =
Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
return Explanation.match(
score,
"weight(" + getQuery() + " in " + doc + ") ["
Expand Down Expand Up @@ -193,7 +193,8 @@ float payloadBoost() throws IOException {
// TODO: for bw compat only, remove this in 6.0
boost = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
} else {
throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: " + payload);
throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: "
+ payload);
}
payloadBoost += boost;
}
Expand Down
Loading

0 comments on commit c565718

Please sign in to comment.