Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 731:b02431a8234c dev
made prototypecluster generic, in ml module, and added randominitialization
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 11 Aug 2015 11:38:05 -0400 |
parents | da1352b89d02 |
children | 1d4dcb5c8708 |
comparison
equal
deleted
inserted
replaced
730:a850a4f92735 | 731:b02431a8234c |
---|---|
109 features = whiten(features) | 109 features = whiten(features) |
110 centroids,distortion = kmeans(features,k, iter) | 110 centroids,distortion = kmeans(features,k, iter) |
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
112 return code,sigma | 112 return code,sigma |
113 | 113 |
114 def motionPatterLearning(objects, maxDistance): | 114 def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False): |
115 '''Finds exemplar (prototype) instance that represent each cluster | |
116 Returns the prototype indices (in the instances list) and the cluster label of each instance | |
117 | |
118 the elements in the instances list must have a length (method __len__), or one can use the random initialization | |
119 the positions in the instances list corresponds to the similarityMatrix | |
120 | |
121 if an instance is different enough (<minSimilarity), | |
122 it will become a new prototype. | |
123 Non-prototype instances will be assigned to an existing prototype | |
124 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters | |
125 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' | |
126 | |
127 # sort instances based on length | |
128 indices = range(similarityMatrix.shape[0]) | |
129 if randomInitialization: | |
130 indices = np.random.permutation(indices) | |
131 else: | |
132 def compare(i, j): | |
133 if len(instances[i]) > len(instances[j]): | |
134 return -1 | |
135 elif len(instances[i]) == len(instances[j]): | |
136 return 0 | |
137 else: | |
138 return 1 | |
139 indices.sort(compare) | |
140 # go through all instances | |
141 prototypeIndices = [indices[0]] | |
142 for i in indices[1:]: | |
143 if similarityMatrix[i][prototypeIndices].max() < minSimilarity: | |
144 prototypeIndices.append(i) | |
145 | |
146 # assignment | |
147 indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices] | |
148 assign = True | |
149 while assign: | |
150 labels = [-1]*similarityMatrix.shape[0] | |
151 for i in prototypeIndices: | |
152 labels[i] = i | |
153 for i in indices: | |
154 prototypeIndex = similarityMatrix[i][prototypeIndices].argmax() | |
155 labels[i] = prototypeIndices[prototypeIndex] | |
156 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | |
157 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) | |
158 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) | |
159 if assign: | |
160 prototypeIndices.remove(smallestClusterIndex) | |
161 indices.append(smallestClusterIndex) | |
162 | |
163 return prototypeIndices, labels | |
164 | |
165 def motionPatternLearning(objects, maxDistance): | |
115 ''' | 166 ''' |
116 Option to use only the (n?) longest features per object instead of all for speed up | 167 Option to use only the (n?) longest features per object instead of all for speed up |
117 TODO''' | 168 TODO''' |
118 pass | 169 pass |
119 | |
120 def prototypeCluster(): | |
121 ''' | |
122 TODO''' | |
123 pass |