Mercurial Hosting > traffic-intelligence
comparison trafficintelligence/ml.py @ 1054:d13f9bfbf3ff
Retry
author | Wendlasida |
---|---|
date | Fri, 06 Jul 2018 18:42:58 -0400 |
parents | 75a6ad604cc5 |
children | ab4c72b9475c |
comparison
equal
deleted
inserted
replaced
1053:60cc87e824c4 | 1054:d13f9bfbf3ff |
---|---|
148 features = whiten(features) | 148 features = whiten(features) |
149 centroids,distortion = kmeans(features,k, iter) | 149 centroids,distortion = kmeans(features,k, iter) |
150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
151 return code,sigma | 151 return code,sigma |
152 | 152 |
153 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0): | 153 def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0): |
154 '''Assigns instances to prototypes | 154 '''Assigns instances to prototypes |
155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters | 155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters |
156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize | 156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize |
157 | 157 |
158 labels are indices in the prototypeIndices''' | 158 labels are indices in the prototypeIndices''' |
159 if similarityFunc is None: | 159 prototypeIndices = copy(initialPrototypeIndices) |
160 print('similarityFunc is None') | |
161 return None | |
162 | |
163 indices = [i for i in range(len(instances)) if i not in prototypeIndices] | 160 indices = [i for i in range(len(instances)) if i not in prototypeIndices] |
164 labels = [-1]*len(instances) | 161 labels = [-1]*len(instances) |
165 assign = True | 162 assign = True |
166 while assign: | 163 while assign: |
167 for i in prototypeIndices: | 164 for i in prototypeIndices: |
182 if assign: | 179 if assign: |
183 prototypeIndices.remove(smallestClusterIndex) | 180 prototypeIndices.remove(smallestClusterIndex) |
184 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] | 181 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
185 return prototypeIndices, labels | 182 return prototypeIndices, labels |
186 | 183 |
187 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): | 184 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): |
188 '''Finds exemplar (prototype) instance that represent each cluster | 185 '''Finds exemplar (prototype) instance that represent each cluster |
189 Returns the prototype indices (in the instances list) | 186 Returns the prototype indices (in the instances list) |
190 | 187 |
191 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid | 188 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid |
192 the positions in the instances list corresponds to the similarities | 189 the positions in the instances list corresponds to the similarities |
202 initialPrototypeIndices are indices in instances | 199 initialPrototypeIndices are indices in instances |
203 | 200 |
204 TODO: check how similarity evolves in clusters''' | 201 TODO: check how similarity evolves in clusters''' |
205 if len(instances) == 0: | 202 if len(instances) == 0: |
206 print('no instances to cluster (empty list)') | 203 print('no instances to cluster (empty list)') |
207 return None | |
208 if similarityFunc is None: | |
209 print('similarityFunc is None') | |
210 return None | 204 return None |
211 | 205 |
212 # sort instances based on length | 206 # sort instances based on length |
213 indices = list(range(len(instances))) | 207 indices = list(range(len(instances))) |
214 if randomInitialization or optimizeCentroid: | 208 if randomInitialization or optimizeCentroid: |
215 indices = np.random.permutation(indices).tolist() | 209 indices = np.random.permutation(indices).tolist() |
216 else: | 210 else: |
217 def compare(i, j): | |
218 if len(instances[i]) > len(instances[j]): | |
219 return -1 | |
220 elif len(instances[i]) == len(instances[j]): | |
221 return 0 | |
222 else: | |
223 return 1 | |
224 indices.sort(key=lambda i: len(instances[i])) | 211 indices.sort(key=lambda i: len(instances[i])) |
225 # initialize clusters | 212 # initialize clusters |
226 clusters = [] | 213 clusters = [] |
227 if initialPrototypeIndices is None: | 214 if initialPrototypeIndices is None: |
228 prototypeIndices = [indices[0]] | 215 prototypeIndices = [indices[0]] |