1
2
3
4
5
6 """
7 KD tree data structure for searching N-dimensional vectors.
8
9 The KD tree data structure can be used for all kinds of searches that
10 involve N-dimensional vectors, e.g. neighbor searches (find all points
11 within a radius of a given point) or finding all point pairs in a set
12 that are within a certain radius of each other. See "Computational Geometry:
13 Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars,
14 Otfried Schwarzkopf). Author: Thomas Hamelryck.
15 """
16
17 from numpy import sum, sqrt, dtype, array
18 from numpy.random import random
19
20 from Bio.KDTree import _CKDTree
21
23 diff=p-q
24 return sqrt(sum(diff*diff))
25
27 """ Test all fixed radius neighbor search.
28
29 Test all fixed radius neighbor search using the
30 KD tree C module.
31
32 o nr_points - number of points used in test
33 o dim - dimension of coords
34 o bucket_size - nr of points per tree node
35 o radius - radius of search (typically 0.05 or so)
36 """
37
38 kdt=_CKDTree.KDTree(dim, bucket_size)
39 coords=random((nr_points, dim))
40 kdt.set_data(coords)
41 neighbors = kdt.neighbor_search(radius)
42 r = [neighbor.radius for neighbor in neighbors]
43 if r is None:
44 l1=0
45 else:
46 l1=len(r)
47
48 neighbors = kdt.neighbor_simple_search(radius)
49 r = [neighbor.radius for neighbor in neighbors]
50 if r is None:
51 l2=0
52 else:
53 l2=len(r)
54 if l1==l2:
55 print "Passed."
56 else:
57 print "Not passed: %i != %i." % (l1, l2)
58
59 -def _test(nr_points, dim, bucket_size, radius):
60 """Test neighbor search.
61
62 Test neighbor search using the KD tree C module.
63
64 o nr_points - number of points used in test
65 o dim - dimension of coords
66 o bucket_size - nr of points per tree node
67 o radius - radius of search (typically 0.05 or so)
68 """
69
70 kdt=_CKDTree.KDTree(dim, bucket_size)
71 coords=random((nr_points, dim))
72 center=coords[0]
73 kdt.set_data(coords)
74 kdt.search_center_radius(center, radius)
75 r=kdt.get_indices()
76 if r is None:
77 l1=0
78 else:
79 l1=len(r)
80 l2=0
81
82 for i in range(0, nr_points):
83 p=coords[i]
84 if _dist(p, center)<=radius:
85 l2=l2+1
86 if l1==l2:
87 print "Passed."
88 else:
89 print "Not passed: %i != %i." % (l1, l2)
90
92 """
93 KD tree implementation (C++, SWIG python wrapper)
94
95 The KD tree data structure can be used for all kinds of searches that
96 involve N-dimensional vectors, e.g. neighbor searches (find all points
97 within a radius of a given point) or finding all point pairs in a set
98 that are within a certain radius of each other.
99
100 Reference:
101
102 Computational Geometry: Algorithms and Applications
103 Second Edition
104 Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf
105 published by Springer-Verlag
106 2nd rev. ed. 2000.
107 ISBN: 3-540-65620-0
108
109 The KD tree data structure is described in chapter 5, pg. 99.
110
111 The following article made clear to me that the nodes should
112 contain more than one point (this leads to dramatic speed
113 improvements for the "all fixed radius neighbor search", see
114 below):
115
116 JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM
117 Symposium on Computational Geometry, vol. 91. San Francisco, 1990
118
119 This KD implementation also performs a "all fixed radius neighbor search",
120 i.e. it can find all point pairs in a set that are within a certain radius
121 of each other. As far as I know the algorithm has not been published.
122 """
123
124 - def __init__(self, dim, bucket_size=1):
125 self.dim=dim
126 self.kdt=_CKDTree.KDTree(dim, bucket_size)
127 self.built=0
128
129
130
132 """Add the coordinates of the points.
133
134 o coords - two dimensional NumPy array. E.g. if the points
135 have dimensionality D and there are N points, the coords
136 array should be NxD dimensional.
137 """
138 if coords.min()<=-1e6 or coords.max()>=1e6:
139 raise Exception("Points should lie between -1e6 and 1e6")
140 if len(coords.shape)!=2 or coords.shape[1]!=self.dim:
141 raise Exception("Expected a Nx%i NumPy array" % self.dim)
142 self.kdt.set_data(coords)
143 self.built=1
144
145
146
147 - def search(self, center, radius):
148 """Search all points within radius of center.
149
150 o center - one dimensional NumPy array. E.g. if the points have
151 dimensionality D, the center array should be D dimensional.
152 o radius - float>0
153 """
154 if not self.built:
155 raise Exception("No point set specified")
156 if center.shape!=(self.dim,):
157 raise Exception("Expected a %i-dimensional NumPy array" \
158 % self.dim)
159 self.kdt.search_center_radius(center, radius)
160
162 """Return radii.
163
164 Return the list of distances from center after
165 a neighbor search.
166 """
167 a=self.kdt.get_radii()
168 if a is None:
169 return []
170 return a
171
173 """Return the list of indices.
174
175 Return the list of indices after a neighbor search.
176 The indices refer to the original coords NumPy array. The
177 coordinates with these indices were within radius of center.
178
179 For an index pair, the first index<second index.
180 """
181 a=self.kdt.get_indices()
182 if a is None:
183 return []
184 return a
185
186
187
188
190 """All fixed neighbor search.
191
192 Search all point pairs that are within radius.
193
194 o radius - float (>0)
195 """
196 if not self.built:
197 raise Exception("No point set specified")
198 self.neighbors = self.kdt.neighbor_search(radius)
199
201 """Return All Fixed Neighbor Search results.
202
203 Return a Nx2 dim NumPy array containing
204 the indices of the point pairs, where N
205 is the number of neighbor pairs.
206 """
207 a = array([[neighbor.index1, neighbor.index2] for neighbor in self.neighbors])
208 return a
209
211 """Return All Fixed Neighbor Search results.
212
213 Return an N-dim array containing the distances
214 of all the point pairs, where N is the number
215 of neighbor pairs..
216 """
217 return [neighbor.radius for neighbor in self.neighbors]
218
219 if __name__=="__main__":
220
221 from numpy.random import random
222
223 nr_points=100000
224 dim=3
225 bucket_size=10
226 query_radius=10
227
228 coords=(200*random((nr_points, dim)))
229
230 kdtree=KDTree(dim, bucket_size)
231
232
233 kdtree.set_coords(coords)
234
235
236
237 kdtree.all_search(query_radius)
238
239
240
241
242
243
244 indices=kdtree.all_get_indices()
245 radii=kdtree.all_get_radii()
246
247 print "Found %i point pairs within radius %f." % (len(indices), query_radius)
248
249
250
251 for i in range(0, 10):
252
253 center=random(dim)
254
255
256 kdtree.search(center, query_radius)
257
258
259 indices=kdtree.get_indices()
260 radii=kdtree.get_radii()
261
262 x, y, z=center
263 print "Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z)
264