diff --git a/Gopkg.lock b/Gopkg.lock
index 92f8b99481a..e1b9e240c9e 100644
--- a/Gopkg.lock
+++ b/Gopkg.lock
@@ -74,6 +74,14 @@
   revision = "03a43f93cd29dc549e6d9b11892795c206f9c38c"
   version = "v1.20.1"
 
+[[projects]]
+  digest = "1:29add35a36be0ef65639a201df2b86e4e3d3b3b2e5eac4e35e02642b23b22405"
+  name = "github.com/VictoriaMetrics/fastcache"
+  packages = ["."]
+  pruneopts = "NUT"
+  revision = "4d94f266cd3cecbcd97eaebee9e3d6d8cf918643"
+  version = "v1.4.6"
+
 [[projects]]
   branch = "master"
   digest = "1:f3793f8a708522400cef1dba23385e901aede5519f68971fd69938ef330b07a1"
@@ -101,6 +109,14 @@
   revision = "4b2b341e8d7715fae06375aa633dbb6e91b3fb46"
   version = "v1.0.0"
 
+[[projects]]
+  digest = "1:0c31fa2fb2c809d61d640e28cc400087fe205df6ec9623dd1eb91a7de8d4f5d6"
+  name = "github.com/cespare/xxhash"
+  packages = ["."]
+  pruneopts = "NUT"
+  revision = "de209a9ffae3256185a6bb135d1a0ada7b2b5f09"
+  version = "v2.1.0"
+
 [[projects]]
   branch = "master"
   digest = "1:e48c63e818c67fbf3d7afe20bba33134ab1a5bf384847385384fd027652a5a96"
@@ -1027,6 +1043,7 @@
     "github.com/Knetic/govaluate",
     "github.com/Shopify/sarama",
     "github.com/Shopify/sarama/mocks",
+    "github.com/VictoriaMetrics/fastcache",
     "github.com/davecgh/go-spew/spew",
     "github.com/fsouza/go-dockerclient",
     "github.com/go-kit/kit/metrics",
diff --git a/Gopkg.toml b/Gopkg.toml
index 54d1851a235..db5904661a6 100644
--- a/Gopkg.toml
+++ b/Gopkg.toml
@@ -188,3 +188,7 @@ noverify = [
 [[constraint]]
   branch = "master"
   name = "github.com/hyperledger/fabric-chaincode-go"
+
+[[constraint]]
+  name = "github.com/VictoriaMetrics/fastcache"
+  version = "1.4.6"
diff --git a/core/ledger/kvledger/txmgmt/statedb/cache.go b/core/ledger/kvledger/txmgmt/statedb/cache.go
new file mode 100644
index 00000000000..fe9ec7adc74
--- /dev/null
+++ b/core/ledger/kvledger/txmgmt/statedb/cache.go
@@ -0,0 +1,37 @@
+/*
+Copyright IBM Corp. All Rights Reserved.
+
+SPDX-License-Identifier: Apache-2.0
+*/
+
+package statedb
+
+import (
+	"github.com/VictoriaMetrics/fastcache"
+)
+
+// Cache holds both the system and user cache
+type Cache struct {
+	sysCache *fastcache.Cache
+	usrCache *fastcache.Cache
+}
+
+// New creates a Cache. The cache consists of both system state cache (for lscc, _lifecycle)
+// and user state cache (for all user deployed chaincodes). The size of the
+// system state cache is 64 MB, by default. The size of the user state cache, in terms of MB, is
+// specified via usrCacheSize parameter. Note that the fastcache allocates memory
+// only in the multiples of 32 MB (due to 512 buckets & an equal number of 64 KB chunks per bucket).
+// If the usrCacheSize is not a multiple of 32 MB, the fastcache would round the size
+// to the next multiple of 32 MB.
+func New(usrCacheSize int) *Cache {
+	cache := &Cache{}
+	// By default, 64 MB is allocated for the system cache
+	cache.sysCache = fastcache.New(64 * 1024 * 1024)
+
+	// User passed size is used to allocate memory for the user cache
+	if usrCacheSize <= 0 {
+		return cache
+	}
+	cache.usrCache = fastcache.New(usrCacheSize * 1024 * 1024)
+	return cache
+}
diff --git a/core/ledger/kvledger/txmgmt/statedb/cache_test.go b/core/ledger/kvledger/txmgmt/statedb/cache_test.go
new file mode 100644
index 00000000000..8522df66aee
--- /dev/null
+++ b/core/ledger/kvledger/txmgmt/statedb/cache_test.go
@@ -0,0 +1,30 @@
+/*
+Copyright IBM Corp. All Rights Reserved.
+
+SPDX-License-Identifier: Apache-2.0
+*/
+
+package statedb
+
+import (
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewCache(t *testing.T) {
+	cache := New(10)
+	expectedCache := &Cache{
+		sysCache: fastcache.New(64 * 1024 * 1024),
+		usrCache: fastcache.New(10 * 1024 * 1024),
+	}
+	assert.Equal(t, expectedCache, cache)
+
+	cache = New(0)
+	expectedCache = &Cache{
+		sysCache: fastcache.New(64 * 1024 * 1024),
+		usrCache: nil,
+	}
+	assert.Equal(t, expectedCache, cache)
+}
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/LICENSE b/vendor/github.com/VictoriaMetrics/fastcache/LICENSE
new file mode 100644
index 00000000000..9a8145e5834
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 VictoriaMetrics
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/bigcache.go b/vendor/github.com/VictoriaMetrics/fastcache/bigcache.go
new file mode 100644
index 00000000000..148945842d8
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/bigcache.go
@@ -0,0 +1,152 @@
+package fastcache
+
+import (
+	"sync"
+	"sync/atomic"
+
+	"github.com/cespare/xxhash"
+)
+
+// maxSubvalueLen is the maximum size of subvalue chunk.
+//
+// - 16 bytes are for subkey encoding
+// - 4 bytes are for len(key)+len(value) encoding inside fastcache
+// - 1 byte is implementation detail of fastcache
+const maxSubvalueLen = chunkSize - 16 - 4 - 1
+
+// maxKeyLen is the maximum size of key.
+//
+// - 16 bytes are for (hash + valueLen)
+// - 4 bytes are for len(key)+len(subkey)
+// - 1 byte is implementation detail of fastcache
+const maxKeyLen = chunkSize - 16 - 4 - 1
+
+// SetBig sets (k, v) to c where len(v) may exceed 64KB.
+//
+// GetBig must be used for reading stored values.
+//
+// The stored entry may be evicted at any time either due to cache
+// overflow or due to unlikely hash collision.
+// Pass higher maxBytes value to New if the added items disappear
+// frequently.
+//
+// It is safe to store entries smaller than 64KB with SetBig.
+//
+// k and v contents may be modified after returning from SetBig.
+func (c *Cache) SetBig(k, v []byte) {
+	atomic.AddUint64(&c.bigStats.SetBigCalls, 1)
+	if len(k) > maxKeyLen {
+		atomic.AddUint64(&c.bigStats.TooBigKeyErrors, 1)
+		return
+	}
+	valueLen := len(v)
+	valueHash := xxhash.Sum64(v)
+
+	// Split v into chunks with up to 64Kb each.
+	subkey := getSubkeyBuf()
+	var i uint64
+	for len(v) > 0 {
+		subkey.B = marshalUint64(subkey.B[:0], valueHash)
+		subkey.B = marshalUint64(subkey.B, uint64(i))
+		i++
+		subvalueLen := maxSubvalueLen
+		if len(v) < subvalueLen {
+			subvalueLen = len(v)
+		}
+		subvalue := v[:subvalueLen]
+		v = v[subvalueLen:]
+		c.Set(subkey.B, subvalue)
+	}
+
+	// Write metavalue, which consists of valueHash and valueLen.
+	subkey.B = marshalUint64(subkey.B[:0], valueHash)
+	subkey.B = marshalUint64(subkey.B, uint64(valueLen))
+	c.Set(k, subkey.B)
+	putSubkeyBuf(subkey)
+}
+
+// GetBig searches for the value for the given k, appends it to dst
+// and returns the result.
+//
+// GetBig returns only values stored via SetBig. It doesn't work
+// with values stored via other methods.
+//
+// k contents may be modified after returning from GetBig.
+func (c *Cache) GetBig(dst, k []byte) []byte {
+	atomic.AddUint64(&c.bigStats.GetBigCalls, 1)
+	subkey := getSubkeyBuf()
+	defer putSubkeyBuf(subkey)
+
+	// Read and parse metavalue
+	subkey.B = c.Get(subkey.B[:0], k)
+	if len(subkey.B) == 0 {
+		// Nothing found.
+		return dst
+	}
+	if len(subkey.B) != 16 {
+		atomic.AddUint64(&c.bigStats.InvalidMetavalueErrors, 1)
+		return dst
+	}
+	valueHash := unmarshalUint64(subkey.B)
+	valueLen := unmarshalUint64(subkey.B[8:])
+
+	// Collect result from chunks.
+	dstLen := len(dst)
+	if n := dstLen + int(valueLen) - cap(dst); n > 0 {
+		dst = append(dst[:cap(dst)], make([]byte, n)...)
+	}
+	dst = dst[:dstLen]
+	var i uint64
+	for uint64(len(dst)-dstLen) < valueLen {
+		subkey.B = marshalUint64(subkey.B[:0], valueHash)
+		subkey.B = marshalUint64(subkey.B, uint64(i))
+		i++
+		dstNew := c.Get(dst, subkey.B)
+		if len(dstNew) == len(dst) {
+			// Cannot find subvalue
+			return dst[:dstLen]
+		}
+		dst = dstNew
+	}
+
+	// Verify the obtained value.
+	v := dst[dstLen:]
+	if uint64(len(v)) != valueLen {
+		atomic.AddUint64(&c.bigStats.InvalidValueLenErrors, 1)
+		return dst[:dstLen]
+	}
+	h := xxhash.Sum64(v)
+	if h != valueHash {
+		atomic.AddUint64(&c.bigStats.InvalidValueHashErrors, 1)
+		return dst[:dstLen]
+	}
+	return dst
+}
+
+func getSubkeyBuf() *bytesBuf {
+	v := subkeyPool.Get()
+	if v == nil {
+		return &bytesBuf{}
+	}
+	return v.(*bytesBuf)
+}
+
+func putSubkeyBuf(bb *bytesBuf) {
+	bb.B = bb.B[:0]
+	subkeyPool.Put(bb)
+}
+
+var subkeyPool sync.Pool
+
+type bytesBuf struct {
+	B []byte
+}
+
+func marshalUint64(dst []byte, u uint64) []byte {
+	return append(dst, byte(u>>56), byte(u>>48), byte(u>>40), byte(u>>32), byte(u>>24), byte(u>>16), byte(u>>8), byte(u))
+}
+
+func unmarshalUint64(src []byte) uint64 {
+	_ = src[7]
+	return uint64(src[0])<<56 | uint64(src[1])<<48 | uint64(src[2])<<40 | uint64(src[3])<<32 | uint64(src[4])<<24 | uint64(src[5])<<16 | uint64(src[6])<<8 | uint64(src[7])
+}
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/fastcache.go b/vendor/github.com/VictoriaMetrics/fastcache/fastcache.go
new file mode 100644
index 00000000000..3192cbdea62
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/fastcache.go
@@ -0,0 +1,384 @@
+package fastcache
+
+import (
+	"fmt"
+	"sync"
+	"sync/atomic"
+
+	"github.com/cespare/xxhash"
+)
+
+const bucketsCount = 512
+
+const chunkSize = 64 * 1024
+
+const bucketSizeBits = 40
+
+const maxBucketSize uint64 = 1 << bucketSizeBits
+
+// Stats represents cache stats.
+//
+// Use Cache.UpdateStats for obtaining fresh stats from the cache.
+type Stats struct {
+	// GetCalls is the number of Get calls.
+	GetCalls uint64
+
+	// SetCalls is the number of Set calls.
+	SetCalls uint64
+
+	// Misses is the number of cache misses.
+	Misses uint64
+
+	// Collisions is the number of cache collisions.
+	//
+	// Usually the number of collisions must be close to zero.
+	// High number of collisions suggest something wrong with cache.
+	Collisions uint64
+
+	// Corruptions is the number of detected corruptions of the cache.
+	//
+	// Corruptions may occur when corrupted cache is loaded from file.
+	Corruptions uint64
+
+	// EntriesCount is the current number of entries in the cache.
+	EntriesCount uint64
+
+	// BytesSize is the current size of the cache in bytes.
+	BytesSize uint64
+
+	// BigStats contains stats for GetBig/SetBig methods.
+	BigStats
+}
+
+// Reset resets s, so it may be re-used again in Cache.UpdateStats.
+func (s *Stats) Reset() {
+	*s = Stats{}
+}
+
+// BigStats contains stats for GetBig/SetBig methods.
+type BigStats struct {
+	// GetBigCalls is the number of GetBig calls.
+	GetBigCalls uint64
+
+	// SetBigCalls is the number of SetBig calls.
+	SetBigCalls uint64
+
+	// TooBigKeyErrors is the number of calls to SetBig with too big key.
+	TooBigKeyErrors uint64
+
+	// InvalidMetavalueErrors is the number of calls to GetBig resulting
+	// to invalid metavalue.
+	InvalidMetavalueErrors uint64
+
+	// InvalidValueLenErrors is the number of calls to GetBig resulting
+	// to a chunk with invalid length.
+	InvalidValueLenErrors uint64
+
+	// InvalidValueHashErrors is the number of calls to GetBig resulting
+	// to a chunk with invalid hash value.
+	InvalidValueHashErrors uint64
+}
+
+func (bs *BigStats) reset() {
+	atomic.StoreUint64(&bs.GetBigCalls, 0)
+	atomic.StoreUint64(&bs.SetBigCalls, 0)
+	atomic.StoreUint64(&bs.TooBigKeyErrors, 0)
+	atomic.StoreUint64(&bs.InvalidMetavalueErrors, 0)
+	atomic.StoreUint64(&bs.InvalidValueLenErrors, 0)
+	atomic.StoreUint64(&bs.InvalidValueHashErrors, 0)
+}
+
+// Cache is a fast thread-safe inmemory cache optimized for big number
+// of entries.
+//
+// It has much lower impact on GC comparing to a simple `map[string][]byte`.
+//
+// Use New or LoadFromFile* for creating new cache instance.
+// Concurrent goroutines may call any Cache methods on the same cache instance.
+//
+// Call Reset when the cache is no longer needed. This reclaims the allocated
+// memory.
+type Cache struct {
+	buckets [bucketsCount]bucket
+
+	bigStats BigStats
+}
+
+// New returns new cache with the given maxBytes capacity in bytes.
+//
+// maxBytes must be smaller than the available RAM size for the app,
+// since the cache holds data in memory.
+//
+// If maxBytes is less than 32MB, then the minimum cache capacity is 32MB.
+func New(maxBytes int) *Cache {
+	if maxBytes <= 0 {
+		panic(fmt.Errorf("maxBytes must be greater than 0; got %d", maxBytes))
+	}
+	var c Cache
+	maxBucketBytes := uint64((maxBytes + bucketsCount - 1) / bucketsCount)
+	for i := range c.buckets[:] {
+		c.buckets[i].Init(maxBucketBytes)
+	}
+	return &c
+}
+
+// Set stores (k, v) in the cache.
+//
+// Get must be used for reading the stored entry.
+//
+// The stored entry may be evicted at any time either due to cache
+// overflow or due to unlikely hash collision.
+// Pass higher maxBytes value to New if the added items disappear
+// frequently.
+//
+// (k, v) entries with summary size exceeding 64KB aren't stored in the cache.
+// SetBig can be used for storing entries exceeding 64KB.
+//
+// k and v contents may be modified after returning from Set.
+func (c *Cache) Set(k, v []byte) {
+	h := xxhash.Sum64(k)
+	idx := h % bucketsCount
+	c.buckets[idx].Set(k, v, h)
+}
+
+// Get appends value by the key k to dst and returns the result.
+//
+// Get allocates new byte slice for the returned value if dst is nil.
+//
+// Get returns only values stored in c via Set.
+//
+// k contents may be modified after returning from Get.
+func (c *Cache) Get(dst, k []byte) []byte {
+	h := xxhash.Sum64(k)
+	idx := h % bucketsCount
+	return c.buckets[idx].Get(dst, k, h)
+}
+
+// Del deletes value for the given k from the cache.
+//
+// k contents may be modified after returning from Del.
+func (c *Cache) Del(k []byte) {
+	h := xxhash.Sum64(k)
+	idx := h % bucketsCount
+	c.buckets[idx].Del(h)
+}
+
+// Reset removes all the items from the cache.
+func (c *Cache) Reset() {
+	for i := range c.buckets[:] {
+		c.buckets[i].Reset()
+	}
+	c.bigStats.reset()
+}
+
+// UpdateStats adds cache stats to s.
+//
+// Call s.Reset before calling UpdateStats if s is re-used.
+func (c *Cache) UpdateStats(s *Stats) {
+	for i := range c.buckets[:] {
+		c.buckets[i].UpdateStats(s)
+	}
+	s.GetBigCalls += atomic.LoadUint64(&c.bigStats.GetBigCalls)
+	s.SetBigCalls += atomic.LoadUint64(&c.bigStats.SetBigCalls)
+	s.TooBigKeyErrors += atomic.LoadUint64(&c.bigStats.TooBigKeyErrors)
+	s.InvalidMetavalueErrors += atomic.LoadUint64(&c.bigStats.InvalidMetavalueErrors)
+	s.InvalidValueLenErrors += atomic.LoadUint64(&c.bigStats.InvalidValueLenErrors)
+	s.InvalidValueHashErrors += atomic.LoadUint64(&c.bigStats.InvalidValueHashErrors)
+}
+
+type bucket struct {
+	mu sync.RWMutex
+
+	// chunks is a ring buffer with encoded (k, v) pairs.
+	// It consists of 64KB chunks.
+	chunks [][]byte
+
+	// m maps hash(k) to idx of (k, v) pair in chunks.
+	m map[uint64]uint64
+
+	// idx points to chunks for writing the next (k, v) pair.
+	idx uint64
+
+	// gen is the generation of chunks.
+	gen uint64
+
+	getCalls    uint64
+	setCalls    uint64
+	misses      uint64
+	collisions  uint64
+	corruptions uint64
+}
+
+func (b *bucket) Init(maxBytes uint64) {
+	if maxBytes >= maxBucketSize {
+		panic(fmt.Errorf("too big maxBytes=%d; should be smaller than %d", maxBytes, maxBucketSize))
+	}
+	maxChunks := (maxBytes + chunkSize - 1) / chunkSize
+	b.chunks = make([][]byte, maxChunks)
+	b.m = make(map[uint64]uint64)
+	b.Reset()
+}
+
+func (b *bucket) Reset() {
+	b.mu.Lock()
+	chunks := b.chunks
+	for i := range chunks {
+		putChunk(chunks[i])
+		chunks[i] = nil
+	}
+	bm := b.m
+	for k := range bm {
+		delete(bm, k)
+	}
+	b.idx = 0
+	b.gen = 1
+	atomic.StoreUint64(&b.getCalls, 0)
+	atomic.StoreUint64(&b.setCalls, 0)
+	atomic.StoreUint64(&b.misses, 0)
+	atomic.StoreUint64(&b.collisions, 0)
+	atomic.StoreUint64(&b.corruptions, 0)
+	b.mu.Unlock()
+}
+
+func (b *bucket) Clean() {
+	b.mu.Lock()
+	bGen := b.gen
+	bIdx := b.idx
+	bm := b.m
+	for k, v := range bm {
+		gen := v >> bucketSizeBits
+		idx := v & ((1 << bucketSizeBits) - 1)
+		if gen == bGen && idx < bIdx || gen+1 == bGen && idx >= bIdx {
+			continue
+		}
+		delete(bm, k)
+	}
+	b.mu.Unlock()
+}
+
+func (b *bucket) UpdateStats(s *Stats) {
+	s.GetCalls += atomic.LoadUint64(&b.getCalls)
+	s.SetCalls += atomic.LoadUint64(&b.setCalls)
+	s.Misses += atomic.LoadUint64(&b.misses)
+	s.Collisions += atomic.LoadUint64(&b.collisions)
+	s.Corruptions += atomic.LoadUint64(&b.corruptions)
+
+	b.mu.RLock()
+	s.EntriesCount += uint64(len(b.m))
+	for _, chunk := range b.chunks {
+		s.BytesSize += uint64(cap(chunk))
+	}
+	b.mu.RUnlock()
+}
+
+func (b *bucket) Set(k, v []byte, h uint64) {
+	setCalls := atomic.AddUint64(&b.setCalls, 1)
+	if setCalls%(1<<14) == 0 {
+		b.Clean()
+	}
+
+	if len(k) >= (1<<16) || len(v) >= (1<<16) {
+		// Too big key or value - its length cannot be encoded
+		// with 2 bytes (see below). Skip the entry.
+		return
+	}
+	var kvLenBuf [4]byte
+	kvLenBuf[0] = byte(uint16(len(k)) >> 8)
+	kvLenBuf[1] = byte(len(k))
+	kvLenBuf[2] = byte(uint16(len(v)) >> 8)
+	kvLenBuf[3] = byte(len(v))
+	kvLen := uint64(len(kvLenBuf) + len(k) + len(v))
+	if kvLen >= chunkSize {
+		// Do not store too big keys and values, since they do not
+		// fit a chunk.
+		return
+	}
+
+	b.mu.Lock()
+	idx := b.idx
+	idxNew := idx + kvLen
+	chunkIdx := idx / chunkSize
+	chunkIdxNew := idxNew / chunkSize
+	if chunkIdxNew > chunkIdx {
+		if chunkIdxNew >= uint64(len(b.chunks)) {
+			idx = 0
+			idxNew = kvLen
+			chunkIdx = 0
+			b.gen++
+			if b.gen == 0 {
+				b.gen = 1
+			}
+		} else {
+			idx = chunkIdxNew * chunkSize
+			idxNew = idx + kvLen
+			chunkIdx = chunkIdxNew
+		}
+		b.chunks[chunkIdx] = b.chunks[chunkIdx][:0]
+	}
+	chunk := b.chunks[chunkIdx]
+	if chunk == nil {
+		chunk = getChunk()
+		chunk = chunk[:0]
+	}
+	chunk = append(chunk, kvLenBuf[:]...)
+	chunk = append(chunk, k...)
+	chunk = append(chunk, v...)
+	b.chunks[chunkIdx] = chunk
+	b.m[h] = idx | (b.gen << bucketSizeBits)
+	b.idx = idxNew
+	b.mu.Unlock()
+}
+
+func (b *bucket) Get(dst, k []byte, h uint64) []byte {
+	atomic.AddUint64(&b.getCalls, 1)
+	found := false
+	b.mu.RLock()
+	v := b.m[h]
+	if v > 0 {
+		gen := v >> bucketSizeBits
+		idx := v & ((1 << bucketSizeBits) - 1)
+		if gen == b.gen && idx < b.idx || gen+1 == b.gen && idx >= b.idx {
+			chunkIdx := idx / chunkSize
+			if chunkIdx >= uint64(len(b.chunks)) {
+				// Corrupted data during the load from file. Just skip it.
+				atomic.AddUint64(&b.corruptions, 1)
+				goto end
+			}
+			chunk := b.chunks[chunkIdx]
+			idx %= chunkSize
+			if idx+4 >= chunkSize {
+				// Corrupted data during the load from file. Just skip it.
+				atomic.AddUint64(&b.corruptions, 1)
+				goto end
+			}
+			kvLenBuf := chunk[idx : idx+4]
+			keyLen := (uint64(kvLenBuf[0]) << 8) | uint64(kvLenBuf[1])
+			valLen := (uint64(kvLenBuf[2]) << 8) | uint64(kvLenBuf[3])
+			idx += 4
+			if idx+keyLen+valLen >= chunkSize {
+				// Corrupted data during the load from file. Just skip it.
+				atomic.AddUint64(&b.corruptions, 1)
+				goto end
+			}
+			if string(k) == string(chunk[idx:idx+keyLen]) {
+				idx += keyLen
+				dst = append(dst, chunk[idx:idx+valLen]...)
+				found = true
+			} else {
+				atomic.AddUint64(&b.collisions, 1)
+			}
+		}
+	}
+end:
+	b.mu.RUnlock()
+	if !found {
+		atomic.AddUint64(&b.misses, 1)
+	}
+	return dst
+}
+
+func (b *bucket) Del(h uint64) {
+	b.mu.Lock()
+	delete(b.m, h)
+	b.mu.Unlock()
+}
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/file.go b/vendor/github.com/VictoriaMetrics/fastcache/file.go
new file mode 100644
index 00000000000..12e36e7c3e8
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/file.go
@@ -0,0 +1,397 @@
+package fastcache
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"runtime"
+
+	"github.com/golang/snappy"
+)
+
+// SaveToFile atomically saves cache data to the given filePath using a single
+// CPU core.
+//
+// SaveToFile may be called concurrently with other operations on the cache.
+//
+// The saved data may be loaded with LoadFromFile*.
+//
+// See also SaveToFileConcurrent for faster saving to file.
+func (c *Cache) SaveToFile(filePath string) error {
+	return c.SaveToFileConcurrent(filePath, 1)
+}
+
+// SaveToFileConcurrent saves cache data to the given filePath using concurrency
+// CPU cores.
+//
+// SaveToFileConcurrent may be called concurrently with other operations
+// on the cache.
+//
+// The saved data may be loaded with LoadFromFile*.
+//
+// See also SaveToFile.
+func (c *Cache) SaveToFileConcurrent(filePath string, concurrency int) error {
+	// Create dir if it doesn't exist.
+	dir := filepath.Dir(filePath)
+	if _, err := os.Stat(dir); err != nil {
+		if !os.IsNotExist(err) {
+			return fmt.Errorf("cannot stat %q: %s", dir, err)
+		}
+		if err := os.MkdirAll(dir, 0755); err != nil {
+			return fmt.Errorf("cannot create dir %q: %s", dir, err)
+		}
+	}
+
+	// Save cache data into a temporary directory.
+	tmpDir, err := ioutil.TempDir(dir, "fastcache.tmp.")
+	if err != nil {
+		return fmt.Errorf("cannot create temporary dir inside %q: %s", dir, err)
+	}
+	defer func() {
+		if tmpDir != "" {
+			_ = os.RemoveAll(tmpDir)
+		}
+	}()
+	gomaxprocs := runtime.GOMAXPROCS(-1)
+	if concurrency <= 0 || concurrency > gomaxprocs {
+		concurrency = gomaxprocs
+	}
+	if err := c.save(tmpDir, concurrency); err != nil {
+		return fmt.Errorf("cannot save cache data to temporary dir %q: %s", tmpDir, err)
+	}
+
+	// Remove old filePath contents, since os.Rename may return
+	// error if filePath dir exists.
+	if err := os.RemoveAll(filePath); err != nil {
+		return fmt.Errorf("cannot remove old contents at %q: %s", filePath, err)
+	}
+	if err := os.Rename(tmpDir, filePath); err != nil {
+		return fmt.Errorf("cannot move temporary dir %q to %q: %s", tmpDir, filePath, err)
+	}
+	tmpDir = ""
+	return nil
+}
+
+// LoadFromFile loads cache data from the given filePath.
+//
+// See SaveToFile* for saving cache data to file.
+func LoadFromFile(filePath string) (*Cache, error) {
+	return load(filePath, 0)
+}
+
+// LoadFromFileOrNew tries loading cache data from the given filePath.
+//
+// The function falls back to creating new cache with the given maxBytes
+// capacity if error occurs during loading the cache from file.
+func LoadFromFileOrNew(filePath string, maxBytes int) *Cache {
+	c, err := load(filePath, maxBytes)
+	if err == nil {
+		return c
+	}
+	return New(maxBytes)
+}
+
+func (c *Cache) save(dir string, workersCount int) error {
+	if err := saveMetadata(c, dir); err != nil {
+		return err
+	}
+
+	// Save buckets by workersCount concurrent workers.
+	workCh := make(chan int, workersCount)
+	results := make(chan error)
+	for i := 0; i < workersCount; i++ {
+		go func(workerNum int) {
+			results <- saveBuckets(c.buckets[:], workCh, dir, workerNum)
+		}(i)
+	}
+	// Feed workers with work
+	for i := range c.buckets[:] {
+		workCh <- i
+	}
+	close(workCh)
+
+	// Read results.
+	var err error
+	for i := 0; i < workersCount; i++ {
+		result := <-results
+		if result != nil && err != nil {
+			err = result
+		}
+	}
+	return err
+}
+
+func load(filePath string, maxBytes int) (*Cache, error) {
+	maxBucketChunks, err := loadMetadata(filePath)
+	if err != nil {
+		return nil, err
+	}
+	if maxBytes > 0 {
+		maxBucketBytes := uint64((maxBytes + bucketsCount - 1) / bucketsCount)
+		expectedBucketChunks := (maxBucketBytes + chunkSize - 1) / chunkSize
+		if maxBucketChunks != expectedBucketChunks {
+			return nil, fmt.Errorf("cache file %s contains maxBytes=%d; want %d", filePath, maxBytes, expectedBucketChunks*chunkSize*bucketsCount)
+		}
+	}
+
+	// Read bucket files from filePath dir.
+	d, err := os.Open(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("cannot open %q: %s", filePath, err)
+	}
+	defer func() {
+		_ = d.Close()
+	}()
+	fis, err := d.Readdir(-1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read files from %q: %s", filePath, err)
+	}
+	results := make(chan error)
+	workersCount := 0
+	var c Cache
+	for _, fi := range fis {
+		fn := fi.Name()
+		if fi.IsDir() || !dataFileRegexp.MatchString(fn) {
+			continue
+		}
+		workersCount++
+		go func(dataPath string) {
+			results <- loadBuckets(c.buckets[:], dataPath, maxBucketChunks)
+		}(filePath + "/" + fn)
+	}
+	err = nil
+	for i := 0; i < workersCount; i++ {
+		result := <-results
+		if result != nil && err == nil {
+			err = result
+		}
+	}
+	if err != nil {
+		return nil, err
+	}
+	return &c, nil
+}
+
+func saveMetadata(c *Cache, dir string) error {
+	metadataPath := dir + "/metadata.bin"
+	metadataFile, err := os.Create(metadataPath)
+	if err != nil {
+		return fmt.Errorf("cannot create %q: %s", metadataPath, err)
+	}
+	defer func() {
+		_ = metadataFile.Close()
+	}()
+	maxBucketChunks := uint64(cap(c.buckets[0].chunks))
+	if err := writeUint64(metadataFile, maxBucketChunks); err != nil {
+		return fmt.Errorf("cannot write maxBucketChunks=%d to %q: %s", maxBucketChunks, metadataPath, err)
+	}
+	return nil
+}
+
+func loadMetadata(dir string) (uint64, error) {
+	metadataPath := dir + "/metadata.bin"
+	metadataFile, err := os.Open(metadataPath)
+	if err != nil {
+		return 0, fmt.Errorf("cannot open %q: %s", metadataPath, err)
+	}
+	defer func() {
+		_ = metadataFile.Close()
+	}()
+	maxBucketChunks, err := readUint64(metadataFile)
+	if err != nil {
+		return 0, fmt.Errorf("cannot read maxBucketChunks from %q: %s", metadataPath, err)
+	}
+	return maxBucketChunks, nil
+}
+
+var dataFileRegexp = regexp.MustCompile(`^data\.\d+\.bin$`)
+
+func saveBuckets(buckets []bucket, workCh <-chan int, dir string, workerNum int) error {
+	dataPath := fmt.Sprintf("%s/data.%d.bin", dir, workerNum)
+	dataFile, err := os.Create(dataPath)
+	if err != nil {
+		return fmt.Errorf("cannot create %q: %s", dataPath, err)
+	}
+	defer func() {
+		_ = dataFile.Close()
+	}()
+	zw := snappy.NewBufferedWriter(dataFile)
+	for bucketNum := range workCh {
+		if err := writeUint64(zw, uint64(bucketNum)); err != nil {
+			return fmt.Errorf("cannot write bucketNum=%d to %q: %s", bucketNum, dataPath, err)
+		}
+		if err := buckets[bucketNum].Save(zw); err != nil {
+			return fmt.Errorf("cannot save bucket[%d] to %q: %s", bucketNum, dataPath, err)
+		}
+	}
+	if err := zw.Close(); err != nil {
+		return fmt.Errorf("cannot close snappy.Writer for %q: %s", dataPath, err)
+	}
+	return nil
+}
+
+func loadBuckets(buckets []bucket, dataPath string, maxChunks uint64) error {
+	dataFile, err := os.Open(dataPath)
+	if err != nil {
+		return fmt.Errorf("cannot open %q: %s", dataPath, err)
+	}
+	defer func() {
+		_ = dataFile.Close()
+	}()
+	zr := snappy.NewReader(dataFile)
+	for {
+		bucketNum, err := readUint64(zr)
+		if err == io.EOF {
+			// Reached the end of file.
+			return nil
+		}
+		if bucketNum >= uint64(len(buckets)) {
+			return fmt.Errorf("unexpected bucketNum read from %q: %d; must be smaller than %d", dataPath, bucketNum, len(buckets))
+		}
+		if err := buckets[bucketNum].Load(zr, maxChunks); err != nil {
+			return fmt.Errorf("cannot load bucket[%d] from %q: %s", bucketNum, dataPath, err)
+		}
+	}
+}
+
+func (b *bucket) Save(w io.Writer) error {
+	b.Clean()
+
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	// Store b.idx, b.gen and b.m to w.
+
+	bIdx := b.idx
+	bGen := b.gen
+	chunksLen := 0
+	for _, chunk := range b.chunks {
+		if chunk == nil {
+			break
+		}
+		chunksLen++
+	}
+	kvs := make([]byte, 0, 2*8*len(b.m))
+	var u64Buf [8]byte
+	for k, v := range b.m {
+		binary.LittleEndian.PutUint64(u64Buf[:], k)
+		kvs = append(kvs, u64Buf[:]...)
+		binary.LittleEndian.PutUint64(u64Buf[:], v)
+		kvs = append(kvs, u64Buf[:]...)
+	}
+
+	if err := writeUint64(w, bIdx); err != nil {
+		return fmt.Errorf("cannot write b.idx: %s", err)
+	}
+	if err := writeUint64(w, bGen); err != nil {
+		return fmt.Errorf("cannot write b.gen: %s", err)
+	}
+	if err := writeUint64(w, uint64(len(kvs))/2/8); err != nil {
+		return fmt.Errorf("cannot write len(b.m): %s", err)
+	}
+	if _, err := w.Write(kvs); err != nil {
+		return fmt.Errorf("cannot write b.m: %s", err)
+	}
+
+	// Store b.chunks to w.
+	if err := writeUint64(w, uint64(chunksLen)); err != nil {
+		return fmt.Errorf("cannot write len(b.chunks): %s", err)
+	}
+	for chunkIdx := 0; chunkIdx < chunksLen; chunkIdx++ {
+		chunk := b.chunks[chunkIdx][:chunkSize]
+		if _, err := w.Write(chunk); err != nil {
+			return fmt.Errorf("cannot write b.chunks[%d]: %s", chunkIdx, err)
+		}
+	}
+
+	return nil
+}
+
+func (b *bucket) Load(r io.Reader, maxChunks uint64) error {
+	bIdx, err := readUint64(r)
+	if err != nil {
+		return fmt.Errorf("cannot read b.idx: %s", err)
+	}
+	bGen, err := readUint64(r)
+	if err != nil {
+		return fmt.Errorf("cannot read b.gen: %s", err)
+	}
+	kvsLen, err := readUint64(r)
+	if err != nil {
+		return fmt.Errorf("cannot read len(b.m): %s", err)
+	}
+	kvsLen *= 2 * 8
+	kvs := make([]byte, kvsLen)
+	if _, err := io.ReadFull(r, kvs); err != nil {
+		return fmt.Errorf("cannot read b.m: %s", err)
+	}
+	m := make(map[uint64]uint64, kvsLen/2/8)
+	for len(kvs) > 0 {
+		k := binary.LittleEndian.Uint64(kvs)
+		kvs = kvs[8:]
+		v := binary.LittleEndian.Uint64(kvs)
+		kvs = kvs[8:]
+		m[k] = v
+	}
+
+	maxBytes := maxChunks * chunkSize
+	if maxBytes >= maxBucketSize {
+		return fmt.Errorf("too big maxBytes=%d; should be smaller than %d", maxBytes, maxBucketSize)
+	}
+	chunks := make([][]byte, maxChunks)
+	chunksLen, err := readUint64(r)
+	if err != nil {
+		return fmt.Errorf("cannot read len(b.chunks): %s", err)
+	}
+	if chunksLen > uint64(maxChunks) {
+		return fmt.Errorf("chunksLen=%d cannot exceed maxChunks=%d", chunksLen, maxChunks)
+	}
+	currChunkIdx := bIdx / chunkSize
+	if currChunkIdx > 0 && currChunkIdx >= chunksLen {
+		return fmt.Errorf("too big bIdx=%d; should be smaller than %d", bIdx, chunksLen * chunkSize)
+	}
+	for chunkIdx := uint64(0); chunkIdx < chunksLen; chunkIdx++ {
+		chunk := getChunk()
+		if _, err := io.ReadFull(r, chunk); err != nil {
+			return fmt.Errorf("cannot read b.chunks[%d]: %s", chunkIdx, err)
+		}
+		chunks[chunkIdx] = chunk
+	}
+	// Adjust len for the chunk pointed by currChunkIdx.
+	if chunksLen > 0 {
+		chunkLen := bIdx % chunkSize
+		chunks[currChunkIdx] = chunks[currChunkIdx][:chunkLen]
+	}
+
+	b.mu.Lock()
+	for _, chunk := range b.chunks {
+		putChunk(chunk)
+	}
+	b.chunks = chunks
+	b.m = m
+	b.idx = bIdx
+	b.gen = bGen
+	b.mu.Unlock()
+
+	return nil
+}
+
+func writeUint64(w io.Writer, u uint64) error {
+	var u64Buf [8]byte
+	binary.LittleEndian.PutUint64(u64Buf[:], u)
+	_, err := w.Write(u64Buf[:])
+	return err
+}
+
+func readUint64(r io.Reader) (uint64, error) {
+	var u64Buf [8]byte
+	if _, err := io.ReadFull(r, u64Buf[:]); err != nil {
+		return 0, err
+	}
+	u := binary.LittleEndian.Uint64(u64Buf[:])
+	return u, nil
+}
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/malloc_heap.go b/vendor/github.com/VictoriaMetrics/fastcache/malloc_heap.go
new file mode 100644
index 00000000000..79a71832adb
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/malloc_heap.go
@@ -0,0 +1,11 @@
+// +build appengine windows
+
+package fastcache
+
+func getChunk() []byte {
+	return make([]byte, chunkSize)
+}
+
+func putChunk(chunk []byte) {
+	// No-op.
+}
diff --git a/vendor/github.com/VictoriaMetrics/fastcache/malloc_mmap.go b/vendor/github.com/VictoriaMetrics/fastcache/malloc_mmap.go
new file mode 100644
index 00000000000..424b79b43ac
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/fastcache/malloc_mmap.go
@@ -0,0 +1,52 @@
+// +build !appengine,!windows
+
+package fastcache
+
+import (
+	"fmt"
+	"sync"
+	"syscall"
+	"unsafe"
+)
+
+const chunksPerAlloc = 1024
+
+var (
+	freeChunks     []*[chunkSize]byte
+	freeChunksLock sync.Mutex
+)
+
+func getChunk() []byte {
+	freeChunksLock.Lock()
+	if len(freeChunks) == 0 {
+		// Allocate offheap memory, so GOGC won't take into account cache size.
+		// This should reduce free memory waste.
+		data, err := syscall.Mmap(-1, 0, chunkSize*chunksPerAlloc, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANON|syscall.MAP_PRIVATE)
+		if err != nil {
+			panic(fmt.Errorf("cannot allocate %d bytes via mmap: %s", chunkSize*chunksPerAlloc, err))
+		}
+		for len(data) > 0 {
+			p := (*[chunkSize]byte)(unsafe.Pointer(&data[0]))
+			freeChunks = append(freeChunks, p)
+			data = data[chunkSize:]
+		}
+	}
+	n := len(freeChunks) - 1
+	p := freeChunks[n]
+	freeChunks[n] = nil
+	freeChunks = freeChunks[:n]
+	freeChunksLock.Unlock()
+	return p[:]
+}
+
+func putChunk(chunk []byte) {
+	if chunk == nil {
+		return
+	}
+	chunk = chunk[:chunkSize]
+	p := (*[chunkSize]byte)(unsafe.Pointer(&chunk[0]))
+
+	freeChunksLock.Lock()
+	freeChunks = append(freeChunks, p)
+	freeChunksLock.Unlock()
+}
diff --git a/vendor/github.com/cespare/xxhash/LICENSE.txt b/vendor/github.com/cespare/xxhash/LICENSE.txt
new file mode 100644
index 00000000000..24b53065f40
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/LICENSE.txt
@@ -0,0 +1,22 @@
+Copyright (c) 2016 Caleb Spare
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/cespare/xxhash/xxhash.go b/vendor/github.com/cespare/xxhash/xxhash.go
new file mode 100644
index 00000000000..db0b35fbe39
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash.go
@@ -0,0 +1,236 @@
+// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
+// at http://cyan4973.github.io/xxHash/.
+package xxhash
+
+import (
+	"encoding/binary"
+	"errors"
+	"math/bits"
+)
+
+const (
+	prime1 uint64 = 11400714785074694791
+	prime2 uint64 = 14029467366897019727
+	prime3 uint64 = 1609587929392839161
+	prime4 uint64 = 9650029242287828579
+	prime5 uint64 = 2870177450012600261
+)
+
+// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
+// possible in the Go code is worth a small (but measurable) performance boost
+// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
+// convenience in the Go code in a few places where we need to intentionally
+// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
+// result overflows a uint64).
+var (
+	prime1v = prime1
+	prime2v = prime2
+	prime3v = prime3
+	prime4v = prime4
+	prime5v = prime5
+)
+
+// Digest implements hash.Hash64.
+type Digest struct {
+	v1    uint64
+	v2    uint64
+	v3    uint64
+	v4    uint64
+	total uint64
+	mem   [32]byte
+	n     int // how much of mem is used
+}
+
+// New creates a new Digest that computes the 64-bit xxHash algorithm.
+func New() *Digest {
+	var d Digest
+	d.Reset()
+	return &d
+}
+
+// Reset clears the Digest's state so that it can be reused.
+func (d *Digest) Reset() {
+	d.v1 = prime1v + prime2
+	d.v2 = prime2
+	d.v3 = 0
+	d.v4 = -prime1v
+	d.total = 0
+	d.n = 0
+}
+
+// Size always returns 8 bytes.
+func (d *Digest) Size() int { return 8 }
+
+// BlockSize always returns 32 bytes.
+func (d *Digest) BlockSize() int { return 32 }
+
+// Write adds more data to d. It always returns len(b), nil.
+func (d *Digest) Write(b []byte) (n int, err error) {
+	n = len(b)
+	d.total += uint64(n)
+
+	if d.n+n < 32 {
+		// This new data doesn't even fill the current block.
+		copy(d.mem[d.n:], b)
+		d.n += n
+		return
+	}
+
+	if d.n > 0 {
+		// Finish off the partial block.
+		copy(d.mem[d.n:], b)
+		d.v1 = round(d.v1, u64(d.mem[0:8]))
+		d.v2 = round(d.v2, u64(d.mem[8:16]))
+		d.v3 = round(d.v3, u64(d.mem[16:24]))
+		d.v4 = round(d.v4, u64(d.mem[24:32]))
+		b = b[32-d.n:]
+		d.n = 0
+	}
+
+	if len(b) >= 32 {
+		// One or more full blocks left.
+		nw := writeBlocks(d, b)
+		b = b[nw:]
+	}
+
+	// Store any remaining partial block.
+	copy(d.mem[:], b)
+	d.n = len(b)
+
+	return
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+func (d *Digest) Sum(b []byte) []byte {
+	s := d.Sum64()
+	return append(
+		b,
+		byte(s>>56),
+		byte(s>>48),
+		byte(s>>40),
+		byte(s>>32),
+		byte(s>>24),
+		byte(s>>16),
+		byte(s>>8),
+		byte(s),
+	)
+}
+
+// Sum64 returns the current hash.
+func (d *Digest) Sum64() uint64 {
+	var h uint64
+
+	if d.total >= 32 {
+		v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
+		h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+		h = mergeRound(h, v1)
+		h = mergeRound(h, v2)
+		h = mergeRound(h, v3)
+		h = mergeRound(h, v4)
+	} else {
+		h = d.v3 + prime5
+	}
+
+	h += d.total
+
+	i, end := 0, d.n
+	for ; i+8 <= end; i += 8 {
+		k1 := round(0, u64(d.mem[i:i+8]))
+		h ^= k1
+		h = rol27(h)*prime1 + prime4
+	}
+	if i+4 <= end {
+		h ^= uint64(u32(d.mem[i:i+4])) * prime1
+		h = rol23(h)*prime2 + prime3
+		i += 4
+	}
+	for i < end {
+		h ^= uint64(d.mem[i]) * prime5
+		h = rol11(h) * prime1
+		i++
+	}
+
+	h ^= h >> 33
+	h *= prime2
+	h ^= h >> 29
+	h *= prime3
+	h ^= h >> 32
+
+	return h
+}
+
+const (
+	magic         = "xxh\x06"
+	marshaledSize = len(magic) + 8*5 + 32
+)
+
+// MarshalBinary implements the encoding.BinaryMarshaler interface.
+func (d *Digest) MarshalBinary() ([]byte, error) {
+	b := make([]byte, 0, marshaledSize)
+	b = append(b, magic...)
+	b = appendUint64(b, d.v1)
+	b = appendUint64(b, d.v2)
+	b = appendUint64(b, d.v3)
+	b = appendUint64(b, d.v4)
+	b = appendUint64(b, d.total)
+	b = append(b, d.mem[:d.n]...)
+	b = b[:len(b)+len(d.mem)-d.n]
+	return b, nil
+}
+
+// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
+func (d *Digest) UnmarshalBinary(b []byte) error {
+	if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+		return errors.New("xxhash: invalid hash state identifier")
+	}
+	if len(b) != marshaledSize {
+		return errors.New("xxhash: invalid hash state size")
+	}
+	b = b[len(magic):]
+	b, d.v1 = consumeUint64(b)
+	b, d.v2 = consumeUint64(b)
+	b, d.v3 = consumeUint64(b)
+	b, d.v4 = consumeUint64(b)
+	b, d.total = consumeUint64(b)
+	copy(d.mem[:], b)
+	b = b[len(d.mem):]
+	d.n = int(d.total % uint64(len(d.mem)))
+	return nil
+}
+
+func appendUint64(b []byte, x uint64) []byte {
+	var a [8]byte
+	binary.LittleEndian.PutUint64(a[:], x)
+	return append(b, a[:]...)
+}
+
+func consumeUint64(b []byte) ([]byte, uint64) {
+	x := u64(b)
+	return b[8:], x
+}
+
+func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
+func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
+
+func round(acc, input uint64) uint64 {
+	acc += input * prime2
+	acc = rol31(acc)
+	acc *= prime1
+	return acc
+}
+
+func mergeRound(acc, val uint64) uint64 {
+	val = round(0, val)
+	acc ^= val
+	acc = acc*prime1 + prime4
+	return acc
+}
+
+func rol1(x uint64) uint64  { return bits.RotateLeft64(x, 1) }
+func rol7(x uint64) uint64  { return bits.RotateLeft64(x, 7) }
+func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
+func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
+func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
+func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
+func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
+func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.go b/vendor/github.com/cespare/xxhash/xxhash_amd64.go
new file mode 100644
index 00000000000..35318d7c46c
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_amd64.go
@@ -0,0 +1,13 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+package xxhash
+
+// Sum64 computes the 64-bit xxHash digest of b.
+//
+//go:noescape
+func Sum64(b []byte) uint64
+
+//go:noescape
+func writeBlocks(*Digest, []byte) int
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
new file mode 100644
index 00000000000..d580e32aed4
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
@@ -0,0 +1,215 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+#include "textflag.h"
+
+// Register allocation:
+// AX	h
+// CX	pointer to advance through b
+// DX	n
+// BX	loop end
+// R8	v1, k1
+// R9	v2
+// R10	v3
+// R11	v4
+// R12	tmp
+// R13	prime1v
+// R14	prime2v
+// R15	prime4v
+
+// round reads from and advances the buffer pointer in CX.
+// It assumes that R13 has prime1v and R14 has prime2v.
+#define round(r) \
+	MOVQ  (CX), R12 \
+	ADDQ  $8, CX    \
+	IMULQ R14, R12  \
+	ADDQ  R12, r    \
+	ROLQ  $31, r    \
+	IMULQ R13, r
+
+// mergeRound applies a merge round on the two registers acc and val.
+// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
+#define mergeRound(acc, val) \
+	IMULQ R14, val \
+	ROLQ  $31, val \
+	IMULQ R13, val \
+	XORQ  val, acc \
+	IMULQ R13, acc \
+	ADDQ  R15, acc
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT, $0-32
+	// Load fixed primes.
+	MOVQ ·prime1v(SB), R13
+	MOVQ ·prime2v(SB), R14
+	MOVQ ·prime4v(SB), R15
+
+	// Load slice.
+	MOVQ b_base+0(FP), CX
+	MOVQ b_len+8(FP), DX
+	LEAQ (CX)(DX*1), BX
+
+	// The first loop limit will be len(b)-32.
+	SUBQ $32, BX
+
+	// Check whether we have at least one block.
+	CMPQ DX, $32
+	JLT  noBlocks
+
+	// Set up initial state (v1, v2, v3, v4).
+	MOVQ R13, R8
+	ADDQ R14, R8
+	MOVQ R14, R9
+	XORQ R10, R10
+	XORQ R11, R11
+	SUBQ R13, R11
+
+	// Loop until CX > BX.
+blockLoop:
+	round(R8)
+	round(R9)
+	round(R10)
+	round(R11)
+
+	CMPQ CX, BX
+	JLE  blockLoop
+
+	MOVQ R8, AX
+	ROLQ $1, AX
+	MOVQ R9, R12
+	ROLQ $7, R12
+	ADDQ R12, AX
+	MOVQ R10, R12
+	ROLQ $12, R12
+	ADDQ R12, AX
+	MOVQ R11, R12
+	ROLQ $18, R12
+	ADDQ R12, AX
+
+	mergeRound(AX, R8)
+	mergeRound(AX, R9)
+	mergeRound(AX, R10)
+	mergeRound(AX, R11)
+
+	JMP afterBlocks
+
+noBlocks:
+	MOVQ ·prime5v(SB), AX
+
+afterBlocks:
+	ADDQ DX, AX
+
+	// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
+	ADDQ $24, BX
+
+	CMPQ CX, BX
+	JG   fourByte
+
+wordLoop:
+	// Calculate k1.
+	MOVQ  (CX), R8
+	ADDQ  $8, CX
+	IMULQ R14, R8
+	ROLQ  $31, R8
+	IMULQ R13, R8
+
+	XORQ  R8, AX
+	ROLQ  $27, AX
+	IMULQ R13, AX
+	ADDQ  R15, AX
+
+	CMPQ CX, BX
+	JLE  wordLoop
+
+fourByte:
+	ADDQ $4, BX
+	CMPQ CX, BX
+	JG   singles
+
+	MOVL  (CX), R8
+	ADDQ  $4, CX
+	IMULQ R13, R8
+	XORQ  R8, AX
+
+	ROLQ  $23, AX
+	IMULQ R14, AX
+	ADDQ  ·prime3v(SB), AX
+
+singles:
+	ADDQ $4, BX
+	CMPQ CX, BX
+	JGE  finalize
+
+singlesLoop:
+	MOVBQZX (CX), R12
+	ADDQ    $1, CX
+	IMULQ   ·prime5v(SB), R12
+	XORQ    R12, AX
+
+	ROLQ  $11, AX
+	IMULQ R13, AX
+
+	CMPQ CX, BX
+	JL   singlesLoop
+
+finalize:
+	MOVQ  AX, R12
+	SHRQ  $33, R12
+	XORQ  R12, AX
+	IMULQ R14, AX
+	MOVQ  AX, R12
+	SHRQ  $29, R12
+	XORQ  R12, AX
+	IMULQ ·prime3v(SB), AX
+	MOVQ  AX, R12
+	SHRQ  $32, R12
+	XORQ  R12, AX
+
+	MOVQ AX, ret+24(FP)
+	RET
+
+// writeBlocks uses the same registers as above except that it uses AX to store
+// the d pointer.
+
+// func writeBlocks(d *Digest, b []byte) int
+TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+	// Load fixed primes needed for round.
+	MOVQ ·prime1v(SB), R13
+	MOVQ ·prime2v(SB), R14
+
+	// Load slice.
+	MOVQ b_base+8(FP), CX
+	MOVQ b_len+16(FP), DX
+	LEAQ (CX)(DX*1), BX
+	SUBQ $32, BX
+
+	// Load vN from d.
+	MOVQ d+0(FP), AX
+	MOVQ 0(AX), R8   // v1
+	MOVQ 8(AX), R9   // v2
+	MOVQ 16(AX), R10 // v3
+	MOVQ 24(AX), R11 // v4
+
+	// We don't need to check the loop condition here; this function is
+	// always called with at least one block of data to process.
+blockLoop:
+	round(R8)
+	round(R9)
+	round(R10)
+	round(R11)
+
+	CMPQ CX, BX
+	JLE  blockLoop
+
+	// Copy vN back to d.
+	MOVQ R8, 0(AX)
+	MOVQ R9, 8(AX)
+	MOVQ R10, 16(AX)
+	MOVQ R11, 24(AX)
+
+	// The number of bytes written is CX minus the old base pointer.
+	SUBQ b_base+8(FP), CX
+	MOVQ CX, ret+32(FP)
+
+	RET
diff --git a/vendor/github.com/cespare/xxhash/xxhash_other.go b/vendor/github.com/cespare/xxhash/xxhash_other.go
new file mode 100644
index 00000000000..4a5a821603e
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_other.go
@@ -0,0 +1,76 @@
+// +build !amd64 appengine !gc purego
+
+package xxhash
+
+// Sum64 computes the 64-bit xxHash digest of b.
+func Sum64(b []byte) uint64 {
+	// A simpler version would be
+	//   d := New()
+	//   d.Write(b)
+	//   return d.Sum64()
+	// but this is faster, particularly for small inputs.
+
+	n := len(b)
+	var h uint64
+
+	if n >= 32 {
+		v1 := prime1v + prime2
+		v2 := prime2
+		v3 := uint64(0)
+		v4 := -prime1v
+		for len(b) >= 32 {
+			v1 = round(v1, u64(b[0:8:len(b)]))
+			v2 = round(v2, u64(b[8:16:len(b)]))
+			v3 = round(v3, u64(b[16:24:len(b)]))
+			v4 = round(v4, u64(b[24:32:len(b)]))
+			b = b[32:len(b):len(b)]
+		}
+		h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+		h = mergeRound(h, v1)
+		h = mergeRound(h, v2)
+		h = mergeRound(h, v3)
+		h = mergeRound(h, v4)
+	} else {
+		h = prime5
+	}
+
+	h += uint64(n)
+
+	i, end := 0, len(b)
+	for ; i+8 <= end; i += 8 {
+		k1 := round(0, u64(b[i:i+8:len(b)]))
+		h ^= k1
+		h = rol27(h)*prime1 + prime4
+	}
+	if i+4 <= end {
+		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+		h = rol23(h)*prime2 + prime3
+		i += 4
+	}
+	for ; i < end; i++ {
+		h ^= uint64(b[i]) * prime5
+		h = rol11(h) * prime1
+	}
+
+	h ^= h >> 33
+	h *= prime2
+	h ^= h >> 29
+	h *= prime3
+	h ^= h >> 32
+
+	return h
+}
+
+func writeBlocks(d *Digest, b []byte) int {
+	v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
+	n := len(b)
+	for len(b) >= 32 {
+		v1 = round(v1, u64(b[0:8:len(b)]))
+		v2 = round(v2, u64(b[8:16:len(b)]))
+		v3 = round(v3, u64(b[16:24:len(b)]))
+		v4 = round(v4, u64(b[24:32:len(b)]))
+		b = b[32:len(b):len(b)]
+	}
+	d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4
+	return n - len(b)
+}
diff --git a/vendor/github.com/cespare/xxhash/xxhash_safe.go b/vendor/github.com/cespare/xxhash/xxhash_safe.go
new file mode 100644
index 00000000000..fc9bea7a31f
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_safe.go
@@ -0,0 +1,15 @@
+// +build appengine
+
+// This file contains the safe implementations of otherwise unsafe-using code.
+
+package xxhash
+
+// Sum64String computes the 64-bit xxHash digest of s.
+func Sum64String(s string) uint64 {
+	return Sum64([]byte(s))
+}
+
+// WriteString adds more data to d. It always returns len(s), nil.
+func (d *Digest) WriteString(s string) (n int, err error) {
+	return d.Write([]byte(s))
+}
diff --git a/vendor/github.com/cespare/xxhash/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/xxhash_unsafe.go
new file mode 100644
index 00000000000..53bf76efbc2
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_unsafe.go
@@ -0,0 +1,46 @@
+// +build !appengine
+
+// This file encapsulates usage of unsafe.
+// xxhash_safe.go contains the safe implementations.
+
+package xxhash
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+// Notes:
+//
+// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
+// for some discussion about these unsafe conversions.
+//
+// In the future it's possible that compiler optimizations will make these
+// unsafe operations unnecessary: https://golang.org/issue/2205.
+//
+// Both of these wrapper functions still incur function call overhead since they
+// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write
+// for strings to squeeze out a bit more speed. Mid-stack inlining should
+// eventually fix this.
+
+// Sum64String computes the 64-bit xxHash digest of s.
+// It may be faster than Sum64([]byte(s)) by avoiding a copy.
+func Sum64String(s string) uint64 {
+	var b []byte
+	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+	bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
+	bh.Len = len(s)
+	bh.Cap = len(s)
+	return Sum64(b)
+}
+
+// WriteString adds more data to d. It always returns len(s), nil.
+// It may be faster than Write([]byte(s)) by avoiding a copy.
+func (d *Digest) WriteString(s string) (n int, err error) {
+	var b []byte
+	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+	bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
+	bh.Len = len(s)
+	bh.Cap = len(s)
+	return d.Write(b)
+}