sync with FB version. Added:

- better selection of training sets for PQ and preprocessing - GPU parameter object - IndexIDMap fixed - fixed redo bug in clustering
facebookresearch · Mar 20, 2017 · c507707 · c507707
1 parent acb9385
commit c507707
Show file tree

Hide file tree

Showing 41 changed files with 2,454 additions and 1,009 deletions.
diff --git a/Clustering.cpp b/Clustering.cpp
@@ -104,14 +104,13 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
                int(nx), d, k, nredo, niter);
 
 
-
     idx_t * assign = new idx_t[nx];
     float * dis = new float[nx];
 
     float best_err = 1e50;
     double t_search_tot = 0;
     if (verbose) {
-        printf("  Preprocessing in %5g s\n",
+        printf("  Preprocessing in %.2f s\n",
                (getmillisecs() - t0)/1000.);
     }
     t0 = getmillisecs();
@@ -149,7 +148,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
         if (!index.is_trained)
             index.train (k, cur_centroids.data());
 
-        FAISS_ASSERT (index.ntotal == 0 );
+        FAISS_ASSERT (index.ntotal == 0);
         index.add (k, cur_centroids.data());
         float err = 0;
         for (int i = 0; i < niter; i++) {
@@ -183,16 +182,17 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
                 index.train (k, cur_centroids.data());
 
             assert (index.ntotal == 0);
-            index.add (k, centroids.data());
+            index.add (k, cur_centroids.data());
         }
         if (verbose) printf("\n");
         if (nredo > 1) {
             if (err < best_err) {
                 if (verbose)
-                    printf ("Keep new clusters\n");
-                centroids = cur_centroids;
+                    printf ("Objective improved: keep new clusters\n");
+                centroids = buf_centroids;
                 best_err = err;
             }
+            index.reset ();
         }
     }
 

diff --git a/Clustering.h b/Clustering.h
@@ -26,7 +26,6 @@ struct ClusteringParameters {
     int niter;          ///< clustering iterations
     int nredo;          ///< redo clustering this many times and keep best
 
-
     bool verbose;
     bool spherical;     ///< do we want normalized centroids?
     bool update_index;  ///< update index after each iteration?

diff --git a/IndexIVFPQ.cpp b/IndexIVFPQ.cpp
@@ -82,8 +82,11 @@ void IndexIVFPQ::train_residual (idx_t n, const float *x)
 
 void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
 {
-    idx_t ntrain = pq.ksub * 64;
-    if(n > ntrain) n = ntrain;
+    const float * x_in = x;
+
+    x = fvecs_maybe_subsample (
+         d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
+         x, verbose, pq.cp.seed);
 
     const float *trainset;
     if (by_residual) {
@@ -132,6 +135,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
         precompute_table ();
     }
 
+    if (x_in != x) delete [] x;
 }
 
 

diff --git a/Makefile b/Makefile
@@ -94,8 +94,6 @@ AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h utils.h Heap.h \
  IndexFlat.h VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h \
  Clustering.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
 AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
-BinaryCode.o: BinaryCode.cpp BinaryCode.h VectorTransform.h Index.h \
- FaissAssert.h hamming.h Heap.h
 Clustering.o: Clustering.cpp Clustering.h Index.h utils.h Heap.h \
  FaissAssert.h IndexFlat.h
 hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h
@@ -105,20 +103,16 @@ IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h \
  FaissAssert.h
 index_io.o: index_io.cpp index_io.h FaissAssert.h IndexFlat.h Index.h \
  VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h Clustering.h \
- Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h
+ Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
 IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h Clustering.h Heap.h utils.h \
  hamming.h FaissAssert.h IndexFlat.h AuxIndexStructures.h
 IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
  Heap.h IndexPQ.h ProductQuantizer.h PolysemousTraining.h utils.h \
  IndexFlat.h hamming.h FaissAssert.h AuxIndexStructures.h
 IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \
  Heap.h hamming.h FaissAssert.h
-IndexNested.o: IndexNested.cpp IndexNested.h IndexIVF.h Index.h \
- Clustering.h Heap.h IndexIVFPQ.h IndexPQ.h ProductQuantizer.h \
- PolysemousTraining.h IndexFlat.h FaissAssert.h
 IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \
  Heap.h PolysemousTraining.h FaissAssert.h hamming.h
-
 MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h FaissAssert.h Heap.h
 PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
  ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
@@ -131,6 +125,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
  Heap.h FaissAssert.h IndexPQ.h ProductQuantizer.h Clustering.h \
  PolysemousTraining.h
 
+
 clean:
 	rm -f $(LIBNAME).a $(LIBNAME).$(SHAREDEXT)* *.o \
 	   	lua/swigfaiss.so lua/swigfaiss_wrap.cxx \

diff --git a/MetaIndexes.h b/MetaIndexes.h
@@ -49,7 +49,7 @@ struct IndexIDMap : Index {
     virtual void set_typename () override;
 
     virtual ~IndexIDMap ();
-
+    IndexIDMap () {own_fields=false; index=nullptr; }
 };
 
 /** Index that concatenates the results from several sub-indexes

diff --git a/VectorTransform.cpp b/VectorTransform.cpp
@@ -95,7 +95,7 @@ void VectorTransform::reverse_transform (
 LinearTransform::LinearTransform (int d_in, int d_out,
                                   bool have_bias):
     VectorTransform (d_in, d_out), have_bias (have_bias),
-    max_points_per_d (1 << 20), verbose (false)
+    verbose (false)
 {}
 
 void LinearTransform::apply_noalloc (Index::idx_t n, const float * x,
@@ -152,27 +152,6 @@ void LinearTransform::transform_transpose (idx_t n, const float * y,
     if (have_bias) delete [] y;
 }
 
-const float * LinearTransform::maybe_subsample_train_set (
-            Index::idx_t *n, const float *x)
-{
-    if (*n <= max_points_per_d * d_in) return x;
-
-    size_t n2 = max_points_per_d * d_in;
-    if (verbose) {
-        printf ("  Input training set too big, sampling "
-                "%ld / %ld vectors\n", n2, *n);
-    }
-    std::vector<int> subset (*n);
-    rand_perm (subset.data (), *n, 1234);
-    float *x_subset = new float[n2 * d_in];
-    for (long i = 0; i < n2; i++)
-        memcpy (&x_subset[i * d_in],
-                &x[subset[i] * size_t(d_in)],
-                sizeof (x[0]) * d_in);
-    *n = n2;
-    return x_subset;
-}
-
 
 /*********************************************
  * RandomRotationMatrix
@@ -228,7 +207,8 @@ void PCAMatrix::train (Index::idx_t n, const float *x)
 {
     const float * x_in = x;
 
-    x = maybe_subsample_train_set(&n, x);
+    x = fvecs_maybe_subsample (d_in, (size_t*)&n,
+                               max_points_per_d * d_in, x, verbose);
 
     // compute mean
     mean.clear(); mean.resize(d_in, 0.0);
@@ -461,7 +441,8 @@ OPQMatrix::OPQMatrix (int d, int M, int d2):
     verbose(false)
 {
     is_trained = false;
-    max_points_per_d = 1000;
+    // OPQ is quite expensive to train, so set this right.
+    max_train_points = 256 * 256;
 }
 
 
@@ -471,7 +452,8 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
 
     const float * x_in = x;
 
-    x = maybe_subsample_train_set (&n, x);
+    x = fvecs_maybe_subsample (d_in, (size_t*)&n,
+                               max_train_points, x, verbose);
 
     // To support d_out > d_in, we pad input vectors with 0s to d_out
     size_t d = d_out <= d_in ? d_in : d_out;

diff --git a/VectorTransform.h b/VectorTransform.h
@@ -100,13 +100,8 @@ struct LinearTransform: VectorTransform {
     void transform_transpose (idx_t n, const float * y,
                               float *x) const;
 
-    // ratio between # training vectors and dimension
-    size_t max_points_per_d;
     bool verbose;
 
-    // subsamples training set if there are too many vectors
-    const float *maybe_subsample_train_set (Index::idx_t *n, const float *x);
-
     virtual ~LinearTransform () {}
 
 
@@ -146,6 +141,9 @@ struct PCAMatrix: LinearTransform {
     /// random rotation after PCA
     bool random_rotation;
 
+    /// ratio between # training vectors and dimension
+    size_t max_points_per_d;
+
     /// try to distribute output eigenvectors in this many bins
     int balanced_bins;
 
@@ -191,8 +189,9 @@ struct OPQMatrix: LinearTransform {
     int niter;      ///< Number of outer training iterations
     int niter_pq;   ///< Number of training iterations for the PQ
     int niter_pq_0; ///< same, for the first outer iteration
+
     /// if there are too many training points, resample
-    int max_points_per_d;
+    size_t max_train_points;
     bool verbose;
 
     /// if d2 != -1, output vectors of this dimension

diff --git a/benchs/bench_gpu_sift1m.py b/benchs/bench_gpu_sift1m.py
@@ -52,7 +52,10 @@ def fvecs_read(fname):
 
 print "============ Exact search"
 
-index = faiss.GpuIndexFlatL2(res, 0, d, False)
+flat_config = faiss.GpuIndexFlatConfig()
+flat_config.device = 0
+
+index = faiss.GpuIndexFlatL2(res, d, flat_config)
 
 print "add vectors to index"
 

diff --git a/benchs/kmeans_mnist.py b/benchs/kmeans_mnist.py
@@ -55,12 +55,17 @@ def train_kmeans(x, k, ngpu):
 
     res = [faiss.StandardGpuResources() for i in range(ngpu)]
 
-    useFloat16 = False
+    flat_config = []
+    for i in range(ngpu):
+        cfg = faiss.GpuIndexFlatConfig()
+        cfg.useFloat16 = False
+        cfg.device = i
+        flat_config.append(cfg)
 
     if ngpu == 1:
-        index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16)
+        index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
     else:
-        indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16)
+        indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                    for i in range(ngpu)]
         index = faiss.IndexProxy()
         for sub_index in indexes:

diff --git a/gpu/GpuAutoTune.cpp b/gpu/GpuAutoTune.cpp
@@ -65,6 +65,7 @@ GpuClonerOptions::GpuClonerOptions():
     useFloat16(false),
     usePrecomputed(true),
     reserveVecs(0),
+    storeTransposed(false),
     verbose(0)
 {}
 
@@ -79,7 +80,12 @@ struct ToGpuCloner: faiss::Cloner, GpuClonerOptions {
 
     Index *clone_Index(const Index *index) override {
         if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
-          return new GpuIndexFlat(resources, device, useFloat16, ifl);
+          GpuIndexFlatConfig config;
+          config.device = device;
+          config.useFloat16 = useFloat16;
+          config.storeTransposed = storeTransposed;
+
+          return new GpuIndexFlat(resources, ifl, config);
         } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
           GpuIndexIVFFlat *res =
             new GpuIndexIVFFlat(resources,

diff --git a/gpu/GpuAutoTune.h b/gpu/GpuAutoTune.h
@@ -40,6 +40,8 @@ struct GpuClonerOptions {
   bool usePrecomputed;
   /// reserve vectors in the invfiles?
   long reserveVecs;
+  /// For GpuIndexFlat, store data in transposed layout?
+  bool storeTransposed;
   int verbose;
   GpuClonerOptions ();
 };