comparison trafficintelligence/ml.py @ 1044:75a6ad604cc5

work on motion patterns
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 05 Jul 2018 17:06:40 -0400
parents 8ffb3ae9f3d2
children ab4c72b9475c
comparison
equal deleted inserted replaced
1043:b735895c8815 1044:75a6ad604cc5
148 features = whiten(features) 148 features = whiten(features)
149 centroids,distortion = kmeans(features,k, iter) 149 centroids,distortion = kmeans(features,k, iter)
150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
151 return code,sigma 151 return code,sigma
152 152
153 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0): 153 def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0):
154 '''Assigns instances to prototypes 154 '''Assigns instances to prototypes
155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters 155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize 156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
157 157
158 labels are indices in the prototypeIndices''' 158 labels are indices in the prototypeIndices'''
159 if similarityFunc is None: 159 prototypeIndices = copy(initialPrototypeIndices)
160 print('similarityFunc is None')
161 return None
162
163 indices = [i for i in range(len(instances)) if i not in prototypeIndices] 160 indices = [i for i in range(len(instances)) if i not in prototypeIndices]
164 labels = [-1]*len(instances) 161 labels = [-1]*len(instances)
165 assign = True 162 assign = True
166 while assign: 163 while assign:
167 for i in prototypeIndices: 164 for i in prototypeIndices:
182 if assign: 179 if assign:
183 prototypeIndices.remove(smallestClusterIndex) 180 prototypeIndices.remove(smallestClusterIndex)
184 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] 181 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
185 return prototypeIndices, labels 182 return prototypeIndices, labels
186 183
187 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): 184 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
188 '''Finds exemplar (prototype) instance that represent each cluster 185 '''Finds exemplar (prototype) instance that represent each cluster
189 Returns the prototype indices (in the instances list) 186 Returns the prototype indices (in the instances list)
190 187
191 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid 188 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid
192 the positions in the instances list corresponds to the similarities 189 the positions in the instances list corresponds to the similarities
202 initialPrototypeIndices are indices in instances 199 initialPrototypeIndices are indices in instances
203 200
204 TODO: check how similarity evolves in clusters''' 201 TODO: check how similarity evolves in clusters'''
205 if len(instances) == 0: 202 if len(instances) == 0:
206 print('no instances to cluster (empty list)') 203 print('no instances to cluster (empty list)')
207 return None
208 if similarityFunc is None:
209 print('similarityFunc is None')
210 return None 204 return None
211 205
212 # sort instances based on length 206 # sort instances based on length
213 indices = list(range(len(instances))) 207 indices = list(range(len(instances)))
214 if randomInitialization or optimizeCentroid: 208 if randomInitialization or optimizeCentroid:
215 indices = np.random.permutation(indices).tolist() 209 indices = np.random.permutation(indices).tolist()
216 else: 210 else:
217 def compare(i, j):
218 if len(instances[i]) > len(instances[j]):
219 return -1
220 elif len(instances[i]) == len(instances[j]):
221 return 0
222 else:
223 return 1
224 indices.sort(key=lambda i: len(instances[i])) 211 indices.sort(key=lambda i: len(instances[i]))
225 # initialize clusters 212 # initialize clusters
226 clusters = [] 213 clusters = []
227 if initialPrototypeIndices is None: 214 if initialPrototypeIndices is None:
228 prototypeIndices = [indices[0]] 215 prototypeIndices = [indices[0]]