Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 907:9fd7b18f75b4
re arranged motion pattern learning
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 23 Jun 2017 23:50:02 -0400 |
parents | 8e8ec4ece66e |
children | b297525b2cbf |
comparison
equal
deleted
inserted
replaced
906:a57e6fbcd8e3 | 907:9fd7b18f75b4 |
---|---|
112 return centroids | 112 return centroids |
113 | 113 |
114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements | 114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements |
115 | 115 |
116 def spectralClustering(similarityMatrix, k, iter=20): | 116 def spectralClustering(similarityMatrix, k, iter=20): |
117 '''Spectral Clustering algorithm''' | 117 '''Spectral Clustering algorithm''' |
118 n = len(similarityMatrix) | 118 n = len(similarityMatrix) |
119 # create Laplacian matrix | 119 # create Laplacian matrix |
120 rowsum = np.sum(similarityMatrix,axis=0) | 120 rowsum = np.sum(similarityMatrix,axis=0) |
121 D = np.diag(1 / np.sqrt(rowsum)) | 121 D = np.diag(1 / np.sqrt(rowsum)) |
122 I = np.identity(n) | 122 I = np.identity(n) |
123 L = I - np.dot(D,np.dot(similarityMatrix,D)) | 123 L = I - np.dot(D,np.dot(similarityMatrix,D)) |
124 # compute eigenvectors of L | 124 # compute eigenvectors of L |
125 U,sigma,V = np.linalg.svd(L) | 125 U,sigma,V = np.linalg.svd(L) |
126 # create feature vector from k first eigenvectors | 126 # create feature vector from k first eigenvectors |
127 # by stacking eigenvectors as columns | 127 # by stacking eigenvectors as columns |
128 features = np.array(V[:k]).T | 128 features = np.array(V[:k]).T |
129 # k-means | 129 # k-means |
130 features = whiten(features) | 130 features = whiten(features) |
131 centroids,distortion = kmeans(features,k, iter) | 131 centroids,distortion = kmeans(features,k, iter) |
132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
133 return code,sigma | 133 return code,sigma |
134 | 134 |
135 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): | 135 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None): |
136 '''Finds exemplar (prototype) instance that represent each cluster | 136 '''Assigns instances to prototypes |
137 Returns the prototype indices (in the instances list) and the cluster label of each instance | |
138 | |
139 the elements in the instances list must have a length (method __len__), or one can use the random initialization | |
140 the positions in the instances list corresponds to the similarities | |
141 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | |
142 similarities must still be allocated with the right size | |
143 | |
144 if an instance is different enough (<minSimilarity), | |
145 it will become a new prototype. | |
146 Non-prototype instances will be assigned to an existing prototype | |
147 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters | 137 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters |
148 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize | 138 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' |
149 | 139 indices = [i for i in range(len(instances)) if i not in prototypeIndices] |
150 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' | 140 labels = [-1]*len(instances) |
151 | |
152 # sort instances based on length | |
153 if len(instances) == 0: | |
154 print('no instances to cluster (empty list)') | |
155 return None | |
156 | |
157 indices = range(len(instances)) | |
158 if randomInitialization: | |
159 indices = np.random.permutation(indices) | |
160 else: | |
161 def compare(i, j): | |
162 if len(instances[i]) > len(instances[j]): | |
163 return -1 | |
164 elif len(instances[i]) == len(instances[j]): | |
165 return 0 | |
166 else: | |
167 return 1 | |
168 indices.sort(compare) | |
169 # go through all instances | |
170 prototypeIndices = [indices[0]] | |
171 for i in indices[1:]: | |
172 if similarityFunc is not None: | |
173 for j in prototypeIndices: | |
174 if similarities[i][j] < 0: | |
175 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
176 similarities[j][i] = similarities[i][j] | |
177 if similarities[i][prototypeIndices].max() < minSimilarity: | |
178 prototypeIndices.append(i) | |
179 elif randomInitialization: # replace prototype by current instance i if longer | |
180 label = similarities[i][prototypeIndices].argmax() | |
181 if len(instances[prototypeIndices[label]]) < len(instances[i]): | |
182 prototypeIndices[label] = i | |
183 | |
184 # assignment | |
185 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices] | |
186 assign = True | 141 assign = True |
187 while assign: | 142 while assign: |
188 labels = [-1]*similarities.shape[0] | |
189 for i in prototypeIndices: | 143 for i in prototypeIndices: |
190 labels[i] = i | 144 labels[i] = i |
191 for i in indices: | 145 for i in indices: |
192 if similarityFunc is not None: | 146 if similarityFunc is not None: |
193 for j in prototypeIndices: | 147 for j in prototypeIndices: |
202 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 156 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
203 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) | 157 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) |
204 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) | 158 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) |
205 if assign: | 159 if assign: |
206 prototypeIndices.remove(smallestClusterIndex) | 160 prototypeIndices.remove(smallestClusterIndex) |
207 indices.append(smallestClusterIndex) | 161 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
208 | |
209 return prototypeIndices, labels | 162 return prototypeIndices, labels |
163 | |
164 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None): | |
165 '''Finds exemplar (prototype) instance that represent each cluster | |
166 Returns the prototype indices (in the instances list) and the cluster label of each instance | |
167 | |
168 the elements in the instances list must have a length (method __len__), or one can use the random initialization | |
169 the positions in the instances list corresponds to the similarities | |
170 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | |
171 similarities must still be allocated with the right size | |
172 | |
173 if an instance is different enough (<minSimilarity), | |
174 it will become a new prototype. | |
175 Non-prototype instances will be assigned to an existing prototype | |
176 | |
177 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' | |
178 | |
179 # sort instances based on length | |
180 if len(instances) == 0: | |
181 print('no instances to cluster (empty list)') | |
182 return None | |
183 | |
184 indices = range(len(instances)) | |
185 if randomInitialization: | |
186 indices = np.random.permutation(indices) | |
187 else: | |
188 def compare(i, j): | |
189 if len(instances[i]) > len(instances[j]): | |
190 return -1 | |
191 elif len(instances[i]) == len(instances[j]): | |
192 return 0 | |
193 else: | |
194 return 1 | |
195 indices.sort(compare) | |
196 # go through all instances | |
197 if initialPrototypeIndices is None: | |
198 prototypeIndices = [indices[0]] | |
199 else: | |
200 prototypeIndices = initialPrototypeIndices | |
201 for i in indices[1:]: | |
202 if similarityFunc is not None: | |
203 for j in prototypeIndices: | |
204 if similarities[i][j] < 0: | |
205 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
206 similarities[j][i] = similarities[i][j] | |
207 if similarities[i][prototypeIndices].max() < minSimilarity: | |
208 prototypeIndices.append(i) | |
209 elif randomInitialization: # replace prototype by current instance i if longer | |
210 label = similarities[i][prototypeIndices].argmax() | |
211 if len(instances[prototypeIndices[label]]) < len(instances[i]): | |
212 prototypeIndices[label] = i | |
213 if assign: | |
214 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) | |
215 else: | |
216 return prototypeIndices, None | |
210 | 217 |
211 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 218 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
212 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 219 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
213 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) | 220 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
214 return clusterSizes | 221 return clusterSizes |
230 | 237 |
231 # Plot an ellipse to show the Gaussian component | 238 # Plot an ellipse to show the Gaussian component |
232 v, w = np.linalg.eigh(covariance) | 239 v, w = np.linalg.eigh(covariance) |
233 angle = np.arctan2(w[0][1], w[0][0]) | 240 angle = np.arctan2(w[0][1], w[0][0]) |
234 angle = 180*angle/np.pi # convert to degrees | 241 angle = 180*angle/np.pi # convert to degrees |
235 v *= 4 | 242 v *= 4 |
236 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) | 243 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) |
237 ell.set_clip_box(fig.bbox) | 244 ell.set_clip_box(fig.bbox) |
238 ell.set_alpha(alpha) | 245 ell.set_alpha(alpha) |
239 fig.axes[0].add_artist(ell) | 246 fig.axes[0].add_artist(ell) |
240 return labels | 247 return labels |