comparison python/ml.py @ 731:b02431a8234c dev

made prototypecluster generic, in ml module, and added randominitialization
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 11 Aug 2015 11:38:05 -0400
parents da1352b89d02
children 1d4dcb5c8708
comparison
equal deleted inserted replaced
730:a850a4f92735 731:b02431a8234c
109 features = whiten(features) 109 features = whiten(features)
110 centroids,distortion = kmeans(features,k, iter) 110 centroids,distortion = kmeans(features,k, iter)
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
112 return code,sigma 112 return code,sigma
113 113
114 def motionPatterLearning(objects, maxDistance): 114 def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False):
115 '''Finds exemplar (prototype) instance that represent each cluster
116 Returns the prototype indices (in the instances list) and the cluster label of each instance
117
118 the elements in the instances list must have a length (method __len__), or one can use the random initialization
119 the positions in the instances list corresponds to the similarityMatrix
120
121 if an instance is different enough (<minSimilarity),
122 it will become a new prototype.
123 Non-prototype instances will be assigned to an existing prototype
124 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
125 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
126
127 # sort instances based on length
128 indices = range(similarityMatrix.shape[0])
129 if randomInitialization:
130 indices = np.random.permutation(indices)
131 else:
132 def compare(i, j):
133 if len(instances[i]) > len(instances[j]):
134 return -1
135 elif len(instances[i]) == len(instances[j]):
136 return 0
137 else:
138 return 1
139 indices.sort(compare)
140 # go through all instances
141 prototypeIndices = [indices[0]]
142 for i in indices[1:]:
143 if similarityMatrix[i][prototypeIndices].max() < minSimilarity:
144 prototypeIndices.append(i)
145
146 # assignment
147 indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
148 assign = True
149 while assign:
150 labels = [-1]*similarityMatrix.shape[0]
151 for i in prototypeIndices:
152 labels[i] = i
153 for i in indices:
154 prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
155 labels[i] = prototypeIndices[prototypeIndex]
156 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
157 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
158 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
159 if assign:
160 prototypeIndices.remove(smallestClusterIndex)
161 indices.append(smallestClusterIndex)
162
163 return prototypeIndices, labels
164
165 def motionPatternLearning(objects, maxDistance):
115 ''' 166 '''
116 Option to use only the (n?) longest features per object instead of all for speed up 167 Option to use only the (n?) longest features per object instead of all for speed up
117 TODO''' 168 TODO'''
118 pass 169 pass
119
120 def prototypeCluster():
121 '''
122 TODO'''
123 pass