comparison python/ml.py @ 907:9fd7b18f75b4

re arranged motion pattern learning
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 23 Jun 2017 23:50:02 -0400
parents 8e8ec4ece66e
children b297525b2cbf
comparison
equal deleted inserted replaced
906:a57e6fbcd8e3 907:9fd7b18f75b4
112 return centroids 112 return centroids
113 113
114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements 114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
115 115
116 def spectralClustering(similarityMatrix, k, iter=20): 116 def spectralClustering(similarityMatrix, k, iter=20):
117 '''Spectral Clustering algorithm''' 117 '''Spectral Clustering algorithm'''
118 n = len(similarityMatrix) 118 n = len(similarityMatrix)
119 # create Laplacian matrix 119 # create Laplacian matrix
120 rowsum = np.sum(similarityMatrix,axis=0) 120 rowsum = np.sum(similarityMatrix,axis=0)
121 D = np.diag(1 / np.sqrt(rowsum)) 121 D = np.diag(1 / np.sqrt(rowsum))
122 I = np.identity(n) 122 I = np.identity(n)
123 L = I - np.dot(D,np.dot(similarityMatrix,D)) 123 L = I - np.dot(D,np.dot(similarityMatrix,D))
124 # compute eigenvectors of L 124 # compute eigenvectors of L
125 U,sigma,V = np.linalg.svd(L) 125 U,sigma,V = np.linalg.svd(L)
126 # create feature vector from k first eigenvectors 126 # create feature vector from k first eigenvectors
127 # by stacking eigenvectors as columns 127 # by stacking eigenvectors as columns
128 features = np.array(V[:k]).T 128 features = np.array(V[:k]).T
129 # k-means 129 # k-means
130 features = whiten(features) 130 features = whiten(features)
131 centroids,distortion = kmeans(features,k, iter) 131 centroids,distortion = kmeans(features,k, iter)
132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
133 return code,sigma 133 return code,sigma
134 134
135 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): 135 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None):
136 '''Finds exemplar (prototype) instance that represent each cluster 136 '''Assigns instances to prototypes
137 Returns the prototype indices (in the instances list) and the cluster label of each instance
138
139 the elements in the instances list must have a length (method __len__), or one can use the random initialization
140 the positions in the instances list corresponds to the similarities
141 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
142 similarities must still be allocated with the right size
143
144 if an instance is different enough (<minSimilarity),
145 it will become a new prototype.
146 Non-prototype instances will be assigned to an existing prototype
147 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters 137 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
148 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize 138 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
149 139 indices = [i for i in range(len(instances)) if i not in prototypeIndices]
150 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' 140 labels = [-1]*len(instances)
151
152 # sort instances based on length
153 if len(instances) == 0:
154 print('no instances to cluster (empty list)')
155 return None
156
157 indices = range(len(instances))
158 if randomInitialization:
159 indices = np.random.permutation(indices)
160 else:
161 def compare(i, j):
162 if len(instances[i]) > len(instances[j]):
163 return -1
164 elif len(instances[i]) == len(instances[j]):
165 return 0
166 else:
167 return 1
168 indices.sort(compare)
169 # go through all instances
170 prototypeIndices = [indices[0]]
171 for i in indices[1:]:
172 if similarityFunc is not None:
173 for j in prototypeIndices:
174 if similarities[i][j] < 0:
175 similarities[i][j] = similarityFunc(instances[i], instances[j])
176 similarities[j][i] = similarities[i][j]
177 if similarities[i][prototypeIndices].max() < minSimilarity:
178 prototypeIndices.append(i)
179 elif randomInitialization: # replace prototype by current instance i if longer
180 label = similarities[i][prototypeIndices].argmax()
181 if len(instances[prototypeIndices[label]]) < len(instances[i]):
182 prototypeIndices[label] = i
183
184 # assignment
185 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
186 assign = True 141 assign = True
187 while assign: 142 while assign:
188 labels = [-1]*similarities.shape[0]
189 for i in prototypeIndices: 143 for i in prototypeIndices:
190 labels[i] = i 144 labels[i] = i
191 for i in indices: 145 for i in indices:
192 if similarityFunc is not None: 146 if similarityFunc is not None:
193 for j in prototypeIndices: 147 for j in prototypeIndices:
202 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} 156 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
203 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) 157 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
204 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) 158 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
205 if assign: 159 if assign:
206 prototypeIndices.remove(smallestClusterIndex) 160 prototypeIndices.remove(smallestClusterIndex)
207 indices.append(smallestClusterIndex) 161 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
208
209 return prototypeIndices, labels 162 return prototypeIndices, labels
163
164 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None):
165 '''Finds exemplar (prototype) instance that represent each cluster
166 Returns the prototype indices (in the instances list) and the cluster label of each instance
167
168 the elements in the instances list must have a length (method __len__), or one can use the random initialization
169 the positions in the instances list corresponds to the similarities
170 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
171 similarities must still be allocated with the right size
172
173 if an instance is different enough (<minSimilarity),
174 it will become a new prototype.
175 Non-prototype instances will be assigned to an existing prototype
176
177 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)'''
178
179 # sort instances based on length
180 if len(instances) == 0:
181 print('no instances to cluster (empty list)')
182 return None
183
184 indices = range(len(instances))
185 if randomInitialization:
186 indices = np.random.permutation(indices)
187 else:
188 def compare(i, j):
189 if len(instances[i]) > len(instances[j]):
190 return -1
191 elif len(instances[i]) == len(instances[j]):
192 return 0
193 else:
194 return 1
195 indices.sort(compare)
196 # go through all instances
197 if initialPrototypeIndices is None:
198 prototypeIndices = [indices[0]]
199 else:
200 prototypeIndices = initialPrototypeIndices
201 for i in indices[1:]:
202 if similarityFunc is not None:
203 for j in prototypeIndices:
204 if similarities[i][j] < 0:
205 similarities[i][j] = similarityFunc(instances[i], instances[j])
206 similarities[j][i] = similarities[i][j]
207 if similarities[i][prototypeIndices].max() < minSimilarity:
208 prototypeIndices.append(i)
209 elif randomInitialization: # replace prototype by current instance i if longer
210 label = similarities[i][prototypeIndices].argmax()
211 if len(instances[prototypeIndices[label]]) < len(instances[i]):
212 prototypeIndices[label] = i
213 if assign:
214 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
215 else:
216 return prototypeIndices, None
210 217
211 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): 218 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
212 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} 219 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
213 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) 220 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
214 return clusterSizes 221 return clusterSizes
230 237
231 # Plot an ellipse to show the Gaussian component 238 # Plot an ellipse to show the Gaussian component
232 v, w = np.linalg.eigh(covariance) 239 v, w = np.linalg.eigh(covariance)
233 angle = np.arctan2(w[0][1], w[0][0]) 240 angle = np.arctan2(w[0][1], w[0][0])
234 angle = 180*angle/np.pi # convert to degrees 241 angle = 180*angle/np.pi # convert to degrees
235 v *= 4 242 v *= 4
236 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) 243 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
237 ell.set_clip_box(fig.bbox) 244 ell.set_clip_box(fig.bbox)
238 ell.set_alpha(alpha) 245 ell.set_alpha(alpha)
239 fig.axes[0].add_artist(ell) 246 fig.axes[0].add_artist(ell)
240 return labels 247 return labels