Simplify AllEntries, AllField and AllFieldMapper:

* Create one AllField field per field eligible for _all. * Add a positionIncrementGap (with a size of 100, not configurable) between each entry in order to distinguish fields when doing phrase query on _all.
loggly · Apr 8, 2016 · c565718 · c565718
1 parent 0d8e399
commit c565718
Show file tree

Hide file tree

Showing 14 changed files with 368 additions and 548 deletions.
diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -343,7 +343,6 @@
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]io[/\\]Channels.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]joda[/\\]Joda.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]Lucene.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]all[/\\]AllTermQuery.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]ElasticsearchDirectoryReader.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]FilterableTermsEnum.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]lucene[/\\]index[/\\]FreqTermsEnum.java" checks="LineLength" />
@@ -485,7 +484,6 @@
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoPointFieldMapper.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoPointFieldMapperLegacy.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]geo[/\\]GeoShapeFieldMapper.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]AllFieldMapper.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]FieldNamesFieldMapper.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]IdFieldMapper.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]internal[/\\]IndexFieldMapper.java" checks="LineLength" />
@@ -1068,7 +1066,6 @@
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]DynamicMappingTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]FieldTypeTestCase.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]MapperServiceTests.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]all[/\\]SimpleAllMapperTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]binary[/\\]BinaryMappingTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]boost[/\\]CustomBoostMappingTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]boost[/\\]FieldLevelBoostTests.java" checks="LineLength" />

diff --git a/core/src/main/java/org/elasticsearch/common/lucene/all/AllEntries.java b/core/src/main/java/org/elasticsearch/common/lucene/all/AllEntries.java
@@ -19,39 +19,24 @@
 
 package org.elasticsearch.common.lucene.all;
 
-import org.elasticsearch.common.io.FastCharArrayWriter;
-import org.elasticsearch.common.io.FastStringReader;
-
-import java.io.IOException;
-import java.io.Reader;
 import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
-import java.util.Set;
 
 /**
  *
  */
-public class AllEntries extends Reader {
-
+public class AllEntries {
     public static class Entry {
         private final String name;
-        private final FastStringReader reader;
-        private final int startOffset;
+        private final String value;
         private final float boost;
 
-        public Entry(String name, FastStringReader reader, int startOffset, float boost) {
+        public Entry(String name, String value, float boost) {
             this.name = name;
-            this.reader = reader;
-            this.startOffset = startOffset;
+            this.value = value;
             this.boost = boost;
         }
 
-        public int startOffset() {
-            return startOffset;
-        }
-
         public String name() {
             return this.name;
         }
@@ -60,179 +45,23 @@ public float boost() {
             return this.boost;
         }
 
-        public FastStringReader reader() {
-            return this.reader;
+        public String value() {
+            return this.value;
         }
     }
 
     private final List<Entry> entries = new ArrayList<>();
 
-    private Entry current;
-
-    private Iterator<Entry> it;
-
-    private boolean itsSeparatorTime = false;
-
-    private boolean customBoost = false;
-
     public void addText(String name, String text, float boost) {
-        if (boost != 1.0f) {
-            customBoost = true;
-        }
-        final int lastStartOffset;
-        if (entries.isEmpty()) {
-            lastStartOffset = -1;
-        } else {
-            final Entry last = entries.get(entries.size() - 1);
-            lastStartOffset = last.startOffset() + last.reader().length();
-        }
-        final int startOffset = lastStartOffset + 1; // +1 because we insert a space between tokens
-        Entry entry = new Entry(name, new FastStringReader(text), startOffset, boost);
+        Entry entry = new Entry(name, text, boost);
         entries.add(entry);
     }
 
-    public boolean customBoost() {
-        return customBoost;
-    }
-
     public void clear() {
         this.entries.clear();
-        this.current = null;
-        this.it = null;
-        itsSeparatorTime = false;
-    }
-
-    @Override
-    public void reset() {
-        try {
-            for (Entry entry : entries) {
-                entry.reader().reset();
-            }
-        } catch (IOException e) {
-            throw new IllegalStateException("should not happen");
-        }
-        it = entries.iterator();
-        if (it.hasNext()) {
-            current = it.next();
-            itsSeparatorTime = true;
-        }
-    }
-
-
-    public String buildText() {
-        reset();
-        FastCharArrayWriter writer = new FastCharArrayWriter();
-        for (Entry entry : entries) {
-            writer.append(entry.reader());
-            writer.append(' ');
-        }
-        reset();
-        return writer.toString();
     }
 
     public List<Entry> entries() {
         return this.entries;
     }
-
-    public Set<String> fields() {
-        Set<String> fields = new HashSet<>();
-        for (Entry entry : entries) {
-            fields.add(entry.name());
-        }
-        return fields;
-    }
-
-    // compute the boost for a token with the given startOffset
-    public float boost(int startOffset) {
-        if (!entries.isEmpty()) {
-            int lo = 0, hi = entries.size() - 1;
-            while (lo <= hi) {
-                final int mid = (lo + hi) >>> 1;
-                final int midOffset = entries.get(mid).startOffset();
-                if (startOffset < midOffset) {
-                    hi = mid - 1;
-                } else {
-                    lo = mid + 1;
-                }
-            }
-            final int index = Math.max(0, hi); // protection against broken token streams
-            assert entries.get(index).startOffset() <= startOffset;
-            assert index == entries.size() - 1 || entries.get(index + 1).startOffset() > startOffset;
-            return entries.get(index).boost();
-        }
-        return 1.0f;
-    }
-
-    @Override
-    public int read(char[] cbuf, int off, int len) throws IOException {
-        if (current == null) {
-            return -1;
-        }
-        if (customBoost) {
-            int result = current.reader().read(cbuf, off, len);
-            if (result == -1) {
-                if (itsSeparatorTime) {
-                    itsSeparatorTime = false;
-                    cbuf[off] = ' ';
-                    return 1;
-                }
-                itsSeparatorTime = true;
-                // close(); No need to close, we work on in mem readers
-                if (it.hasNext()) {
-                    current = it.next();
-                } else {
-                    current = null;
-                }
-                return read(cbuf, off, len);
-            }
-            return result;
-        } else {
-            int read = 0;
-            while (len > 0) {
-                int result = current.reader().read(cbuf, off, len);
-                if (result == -1) {
-                    if (it.hasNext()) {
-                        current = it.next();
-                    } else {
-                        current = null;
-                        if (read == 0) {
-                            return -1;
-                        }
-                        return read;
-                    }
-                    cbuf[off++] = ' ';
-                    read++;
-                    len--;
-                } else {
-                    read += result;
-                    off += result;
-                    len -= result;
-                }
-            }
-            return read;
-        }
-    }
-
-    @Override
-    public void close() {
-        if (current != null) {
-            // no need to close, these are readers on strings
-            current = null;
-        }
-    }
-
-
-    @Override
-    public boolean ready() throws IOException {
-        return (current != null) && current.reader().ready();
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        for (Entry entry : entries) {
-            sb.append(entry.name()).append(',');
-        }
-        return sb.toString();
-    }
 }
diff --git a/core/src/main/java/org/elasticsearch/common/lucene/all/AllField.java b/core/src/main/java/org/elasticsearch/common/lucene/all/AllField.java
@@ -24,60 +24,26 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
-import org.elasticsearch.ElasticsearchException;
 
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- *
- */
 public class AllField extends Field {
+    private final float boost;
 
-    private final AllEntries allEntries;
-
-    private final Analyzer analyzer;
-
-    public AllField(String name, AllEntries allEntries, Analyzer analyzer, FieldType fieldType) {
-        super(name, fieldType);
-        this.allEntries = allEntries;
-        this.analyzer = analyzer;
-    }
-
-    @Override
-    public String stringValue() {
-        if (fieldType().stored()) {
-            return allEntries.buildText();
-        }
-        return null;
-    }
-
-    @Override
-    public Reader readerValue() {
-        return null;
-    }
-
-    /** Returns the {@link AllEntries} containing the original text fields for the document. */
-    public AllEntries getAllEntries() {
-        return allEntries;
+    public AllField(String name, String value, float boost, FieldType fieldType) {
+        super(name, value, fieldType);
+        this.boost = boost;
     }
 
     @Override
     public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
-        try {
-            allEntries.reset(); // reset the all entries, just in case it was read already
-            if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
-                // TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
-                // but we need to be careful this optimization is safe (and tested)...
-
-                // AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
-                // positions are indexed:
-                return AllTokenStream.allTokenStream(name, allEntries, analyzer);
-            } else {
-                return analyzer.tokenStream(name, allEntries);
-            }
-        } catch (IOException e) {
-            throw new ElasticsearchException("Failed to create token stream");
+        TokenStream ts = analyzer.tokenStream(name(), stringValue());
+        if (boost != 1.0f && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
+            // TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
+            // but we need to be careful this optimization is safe (and tested)...
+
+            // AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
+            // positions are indexed:
+            return new AllTokenStream(ts, boost);
         }
+        return ts;
     }
 }
diff --git a/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java
@@ -43,7 +43,6 @@
 import org.apache.lucene.search.similarities.Similarity.SimWeight;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SmallFloat;
-import org.apache.lucene.util.ToStringUtils;
 
 import java.io.IOException;
 import java.util.Set;
@@ -129,7 +128,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
                         SimScorer docScorer = similarity.simScorer(stats, context);
                         Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                         Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
-                        Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
+                        Explanation payloadBoostExplanation =
+                            Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                         return Explanation.match(
                                 score,
                                 "weight(" + getQuery() + " in " + doc + ") ["
@@ -193,7 +193,8 @@ float payloadBoost() throws IOException {
                         // TODO: for bw compat only, remove this in 6.0
                         boost = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
                     } else {
-                        throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: " + payload);
+                        throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: "
+                            + payload);
                     }
                     payloadBoost += boost;
                 }