black, isort

spotify · Apr 10, 2023 · 2dfb09d · 2dfb09d
1 parent d33c732
commit 2dfb09d
Show file tree

Hide file tree

Showing 15 changed files with 424 additions and 276 deletions.
diff --git a/test/accuracy_test.py b/test/accuracy_test.py
@@ -14,67 +14,75 @@
 
 from __future__ import print_function
 
-import h5py
-import unittest
-import random
 import os
+
+import h5py
+
 from annoy import AnnoyIndex
+
 try:
     from urllib import urlretrieve
 except ImportError:
-    from urllib.request import urlretrieve # Python 3
-import gzip
+    from urllib.request import urlretrieve  # Python 3
+
+
 
 def _get_index(dataset):
-    url = 'http://vectors.erikbern.com/%s.hdf5' % dataset
-    vectors_fn = os.path.join('test', dataset + '.hdf5')
-    index_fn = os.path.join('test', dataset + '.annoy')
+    url = "http://vectors.erikbern.com/%s.hdf5" % dataset
+    vectors_fn = os.path.join("test", dataset + ".hdf5")
+    index_fn = os.path.join("test", dataset + ".annoy")
 
     if not os.path.exists(vectors_fn):
-        print('downloading', url, '->', vectors_fn)
+        print("downloading", url, "->", vectors_fn)
         urlretrieve(url, vectors_fn)
 
-    dataset_f = h5py.File(vectors_fn, 'r')
-    distance = dataset_f.attrs['distance']
-    f = dataset_f['train'].shape[1]
+    dataset_f = h5py.File(vectors_fn, "r")
+    distance = dataset_f.attrs["distance"]
+    f = dataset_f["train"].shape[1]
     annoy = AnnoyIndex(f, distance)
 
     if not os.path.exists(index_fn):
-        print('adding items', distance, f)
-        for i, v in enumerate(dataset_f['train']):
+        print("adding items", distance, f)
+        for i, v in enumerate(dataset_f["train"]):
             annoy.add_item(i, v)
 
-        print('building index')
+        print("building index")
         annoy.build(10)
         annoy.save(index_fn)
     else:
         annoy.load(index_fn)
     return annoy, dataset_f
 
+
 def _test_index(dataset, exp_accuracy):
     annoy, dataset_f = _get_index(dataset)
 
     n, k = 0, 0
 
-    for i, v in enumerate(dataset_f['test']):
+    for i, v in enumerate(dataset_f["test"]):
         js_fast = annoy.get_nns_by_vector(v, 10, 1000)
-        js_real = dataset_f['neighbors'][i][:10]
+        js_real = dataset_f["neighbors"][i][:10]
         assert len(js_fast) == 10
         assert len(js_real) == 10
 
         n += 10
         k += len(set(js_fast).intersection(js_real))
 
     accuracy = 100.0 * k / n
-    print('%50s accuracy: %5.2f%% (expected %5.2f%%)' % (dataset, accuracy, exp_accuracy))
+    print(
+        "%50s accuracy: %5.2f%% (expected %5.2f%%)" % (dataset, accuracy, exp_accuracy)
+    )
+
+    assert accuracy > exp_accuracy - 1.0  # should be within 1%
 
-    assert accuracy > exp_accuracy - 1.0 # should be within 1%
 
 def test_glove_25():
-    _test_index('glove-25-angular', 69.00)
+    _test_index("glove-25-angular", 69.00)
+
 
 def test_nytimes_16():
-    _test_index('nytimes-16-angular', 80.00)
+    _test_index("nytimes-16-angular", 80.00)
+
 
 def test_fashion_mnist():
-    _test_index('fashion-mnist-784-euclidean', 90.00)
+    _test_index("fashion-mnist-784-euclidean", 90.00)
diff --git a/test/angular_index_test.py b/test/angular_index_test.py
@@ -12,15 +12,17 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import random
+
 import numpy
 import pytest
-import random
+
 from annoy import AnnoyIndex
 
 
 def test_get_nns_by_vector():
     f = 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [0, 0, 1])
     i.add_item(1, [0, 1, 0])
     i.add_item(2, [1, 0, 0])
@@ -30,121 +32,134 @@ def test_get_nns_by_vector():
     assert i.get_nns_by_vector([1, 2, 3], 3) == [0, 1, 2]
     assert i.get_nns_by_vector([2, 0, 1], 3) == [2, 0, 1]
 
+
 def test_get_nns_by_item():
     f = 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [2, 1, 0])
     i.add_item(1, [1, 2, 0])
     i.add_item(2, [0, 0, 1])
     i.build(10)
 
     assert i.get_nns_by_item(0, 3) == [0, 1, 2]
     assert i.get_nns_by_item(1, 3) == [1, 0, 2]
-    assert i.get_nns_by_item(2, 3) in [[2, 0, 1], [2, 1, 0]] # could be either
+    assert i.get_nns_by_item(2, 3) in [[2, 0, 1], [2, 1, 0]]  # could be either
+
 
 def test_dist():
     f = 2
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [0, 1])
     i.add_item(1, [1, 1])
 
-    assert i.get_distance(0, 1) == pytest.approx((2 * (1.0 - 2 ** -0.5))**0.5)
+    assert i.get_distance(0, 1) == pytest.approx((2 * (1.0 - 2**-0.5)) ** 0.5)
+
 
 def test_dist_2():
     f = 2
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [1000, 0])
     i.add_item(1, [10, 0])
 
     assert i.get_distance(0, 1) == pytest.approx(0)
 
+
 def test_dist_3():
     f = 2
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [97, 0])
     i.add_item(1, [42, 42])
 
-    dist = ((1 - 2 ** -0.5) ** 2 + (2 ** -0.5) ** 2)**0.5
+    dist = ((1 - 2**-0.5) ** 2 + (2**-0.5) ** 2) ** 0.5
 
     assert i.get_distance(0, 1) == pytest.approx(dist)
 
+
 def test_dist_degen():
     f = 2
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [1, 0])
     i.add_item(1, [0, 0])
 
     assert i.get_distance(0, 1) == pytest.approx(2.0**0.5)
 
+
 def test_large_index():
     # Generate pairs of random points where the pair is super close
     f = 10
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     for j in range(0, 10000, 2):
         p = [random.gauss(0, 1) for z in range(f)]
         f1 = random.random() + 1
         f2 = random.random() + 1
         x = [f1 * pi + random.gauss(0, 1e-2) for pi in p]
         y = [f2 * pi + random.gauss(0, 1e-2) for pi in p]
         i.add_item(j, x)
-        i.add_item(j+1, y)
+        i.add_item(j + 1, y)
 
     i.build(10)
     for j in range(0, 10000, 2):
-        assert i.get_nns_by_item(j, 2) == [j, j+1]
-        assert i.get_nns_by_item(j+1, 2) == [j+1, j]
+        assert i.get_nns_by_item(j, 2) == [j, j + 1]
+        assert i.get_nns_by_item(j + 1, 2) == [j + 1, j]
+
 
 def precision(n, n_trees=10, n_points=10000, n_rounds=10, search_k=100000):
     found = 0
     for r in range(n_rounds):
         # create random points at distance x from (1000, 0, 0, ...)
         f = 10
-        i = AnnoyIndex(f, 'angular')
+        i = AnnoyIndex(f, "angular")
         for j in range(n_points):
             p = [random.gauss(0, 1) for z in range(f - 1)]
-            norm = sum([pi ** 2 for pi in p]) ** 0.5
+            norm = sum([pi**2 for pi in p]) ** 0.5
             x = [1000] + [pi / norm * j for pi in p]
             i.add_item(j, x)
 
         i.build(n_trees)
 
-        nns = i.get_nns_by_vector([1000] + [0] * (f-1), n, search_k)
+        nns = i.get_nns_by_vector([1000] + [0] * (f - 1), n, search_k)
         assert nns == sorted(nns)  # should be in order
         # The number of gaps should be equal to the last item minus n-1
         found += len([x for x in nns if x < n])
 
     return 1.0 * found / (n * n_rounds)
 
+
 def test_precision_1():
     assert precision(1) >= 0.98
 
+
 def test_precision_10():
     assert precision(10) >= 0.98
 
+
 def test_precision_100():
     assert precision(100) >= 0.98
 
+
 def test_precision_1000():
     assert precision(1000) >= 0.98
 
+
 def test_load_save_get_item_vector():
     f = 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [1.1, 2.2, 3.3])
     i.add_item(1, [4.4, 5.5, 6.6])
     i.add_item(2, [7.7, 8.8, 9.9])
- 
+
     numpy.testing.assert_array_almost_equal(i.get_item_vector(0), [1.1, 2.2, 3.3])
     assert i.build(10)
-    assert i.save('blah.ann')
+    assert i.save("blah.ann")
     numpy.testing.assert_array_almost_equal(i.get_item_vector(1), [4.4, 5.5, 6.6])
-    j = AnnoyIndex(f, 'angular')
-    assert j.load('blah.ann')
+    j = AnnoyIndex(f, "angular")
+    assert j.load("blah.ann")
     numpy.testing.assert_array_almost_equal(j.get_item_vector(2), [7.7, 8.8, 9.9])
 
+
 def test_get_nns_search_k():
     f = 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     i.add_item(0, [0, 0, 1])
     i.add_item(1, [0, 1, 0])
     i.add_item(2, [1, 0, 0])
@@ -153,10 +168,11 @@ def test_get_nns_search_k():
     assert i.get_nns_by_item(0, 3, 10) == [0, 1, 2]
     assert i.get_nns_by_vector([3, 2, 1], 3, 10) == [2, 1, 0]
 
+
 def test_include_dists():
     # Double checking issue 112
     f = 40
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     v = numpy.random.normal(size=f)
     i.add_item(0, v)
     i.add_item(1, -v)
@@ -167,19 +183,21 @@ def test_include_dists():
     assert dists[0] == pytest.approx(0.0)
     assert dists[1] == pytest.approx(2.0)
 
+
 def test_include_dists_check_ranges():
     f = 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     for j in range(100000):
         i.add_item(j, numpy.random.normal(size=f))
     i.build(10)
     indices, dists = i.get_nns_by_item(0, 100000, include_distances=True)
     assert max(dists) <= 2.0
     assert min(dists) == pytest.approx(0.0)
 
+
 def test_distance_consistency():
     n, f = 1000, 3
-    i = AnnoyIndex(f, 'angular')
+    i = AnnoyIndex(f, "angular")
     for j in range(n):
         i.add_item(j, numpy.random.normal(size=f))
     i.build(10)
@@ -189,40 +207,53 @@ def test_distance_consistency():
             assert dist == pytest.approx(i.get_distance(a, b))
             u = i.get_item_vector(a)
             v = i.get_item_vector(b)
-            u_norm = numpy.array(u) * numpy.dot(u, u)**-0.5
-            v_norm = numpy.array(v) * numpy.dot(v, v)**-0.5
+            u_norm = numpy.array(u) * numpy.dot(u, u) ** -0.5
+            v_norm = numpy.array(v) * numpy.dot(v, v) ** -0.5
             # cos = numpy.clip(1 - cosine(u, v), -1, 1) # scipy returns 1 - cos
-            assert dist ** 2 == pytest.approx(numpy.dot(u_norm - v_norm, u_norm - v_norm), rel=1e-2)
+            assert dist**2 == pytest.approx(
+                numpy.dot(u_norm - v_norm, u_norm - v_norm), rel=1e-2
+            )
             # self.assertAlmostEqual(dist, (2*(1 - cos))**0.5)
-            assert dist ** 2 == pytest.approx(sum([(x-y)**2 for x, y in zip(u_norm, v_norm)]), rel=1e-2)
+            assert dist**2 == pytest.approx(
+                sum([(x - y) ** 2 for x, y in zip(u_norm, v_norm)]), rel=1e-2
+            )
+
 
 def test_only_one_item():
     # reported to annoy-user by Kireet Reddy
-    idx = AnnoyIndex(100, 'angular')
+    idx = AnnoyIndex(100, "angular")
     idx.add_item(0, numpy.random.randn(100))
     idx.build(n_trees=10)
-    idx.save('foo.idx')
-    idx = AnnoyIndex(100, 'angular')
-    idx.load('foo.idx')
+    idx.save("foo.idx")
+    idx = AnnoyIndex(100, "angular")
+    idx.load("foo.idx")
     assert idx.get_n_items() == 1
-    assert idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False) == [0]
+    assert idx.get_nns_by_vector(
+        vector=numpy.random.randn(100), n=50, include_distances=False
+    ) == [0]
+
 
 def test_no_items():
-    idx = AnnoyIndex(100, 'angular')
+    idx = AnnoyIndex(100, "angular")
     idx.build(n_trees=10)
-    idx.save('foo.idx')
-    idx = AnnoyIndex(100, 'angular')
-    idx.load('foo.idx')
+    idx.save("foo.idx")
+    idx = AnnoyIndex(100, "angular")
+    idx.load("foo.idx")
     assert idx.get_n_items() == 0
-    assert idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False) == []
+    assert (
+        idx.get_nns_by_vector(
+            vector=numpy.random.randn(100), n=50, include_distances=False
+        )
+        == []
+    )
+
 
 def test_single_vector():
     # https://github.com/spotify/annoy/issues/194
-    a = AnnoyIndex(3, 'angular')
+    a = AnnoyIndex(3, "angular")
     a.add_item(0, [1, 0, 0])
     a.build(10)
-    a.save('1.ann')
+    a.save("1.ann")
     indices, dists = a.get_nns_by_vector([1, 0, 0], 3, include_distances=True)
     assert indices == [0]
     assert dists[0] ** 2 == pytest.approx(0.0)
-