-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathann_methods.py
127 lines (97 loc) · 4.13 KB
/
ann_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# ANN methods with the shared interface
# The design is inspired by ann-benchmark https://github.com/erikbern/ann-benchmarks
import numpy as np
import annoy # pip install annoy
import falconn # pip install FALCONN
import nanopq # pip install nanopq (can be installed when rii is installed)
import nmslib # pip install nmslib
import faiss # conda install -c pytorch faiss-cpu
### If you'd like to debug, please uninstall rii and uncomment the following lines
#import sys
#sys.path.append('../../')
import rii # pip install rii
class RiiSearcher(object):
def __init__(self, L=5000, K=1000, M=64):
self.L = L # Set None if you'd like to try with other dataset
self.K = K # Set None if you'd like to try with other dataset
self.M = M
self.index = None
def train(self, vecs):
codec = nanopq.PQ(M=self.M, verbose=False).fit(vecs=vecs)
self.index = rii.Rii(fine_quantizer=codec)
def add(self, vecs):
self.index.add_configure(vecs=vecs, nlist=self.K)
def search(self, q, topk):
ids, _ = self.index.query(q=q, L=self.L, topk=topk)
return ids
class AnnoySearcher(object):
def __init__(self, n_trees=2000, k_search=400):
self.n_trees = n_trees
self.k_search = k_search
self.index = None
def train(self, vecs):
pass
def add(self, vecs):
self.index = annoy.AnnoyIndex(f=vecs.shape[1], metric="euclidean")
for n, v in enumerate(vecs):
self.index.add_item(n, v.tolist())
self.index.build(self.n_trees)
def search(self, q, topk):
return self.index.get_nns_by_vector(q.tolist(), n=topk, search_k=self.k_search)
class FalconnSearcher(object):
def __init__(self, num_probes=16):
self.num_probes = num_probes
self.center = None
self.params_cp = None
self.table = None
self.query_object = None
def train(self, vecs):
pass
def add(self, vecs):
self.center = np.mean(vecs, axis=0) # Subtract mean vector later
self.params_cp = falconn.get_default_parameters(num_points=vecs.shape[0],
dimension=vecs.shape[1],
distance=falconn.DistanceFunction.EuclideanSquared,
is_sufficiently_dense=True)
# self.params_cp.num_setup_threads = 0 # Single thread mode
bit = int(np.round(np.log2(vecs.shape[0])))
falconn.compute_number_of_hash_functions(bit, self.params_cp)
self.table = falconn.LSHIndex(self.params_cp)
self.table.setup(vecs - self.center)
self.query_object = self.table.construct_query_object()
def search(self, q, topk):
self.query_object.set_num_probes(self.num_probes)
return self.query_object.find_k_nearest_neighbors(q - self.center, topk)
class NmslibSearcher(object):
def __init__(self, post=2, efConstruction=400, efSearch=4):
self.index = nmslib.init(method='hnsw', space='l2')
self.post = post
self.efConstruction = efConstruction
self.efSearch = efSearch
def train(self, vecs):
pass
def add(self, vecs):
self.index.addDataPointBatch(vecs)
self.index.createIndex({'post': self.post, 'efConstruction': self.efConstruction}, print_progress=True)
def search(self, q, topk):
self.index.setQueryTimeParams({'efSearch': self.efSearch})
ids, distances = self.index.knnQuery(q, k=topk)
return ids
class FaissSearcher(object):
def __init__(self, nlist=1000, M=64, nprobe=4):
self.nlist = nlist
self.M = M
self.nprobe = nprobe
self.quantizer = None
self.index = None
def train(self, vecs):
D = vecs.shape[1]
self.quantizer = faiss.IndexFlatL2(D)
self.index = faiss.IndexIVFPQ(self.quantizer, D, self.nlist, self.M, 8)
self.index.train(vecs)
def add(self, vecs):
self.index.add(vecs)
def search(self, q, topk):
self.index.nprobe = self.nprobe
D, I = self.index.search(q.reshape(1, -1), topk)
return I