Mercurial Hosting > traffic-intelligence
diff python/ml.py @ 731:b02431a8234c dev
made prototypecluster generic, in ml module, and added randominitialization
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 11 Aug 2015 11:38:05 -0400 |
parents | da1352b89d02 |
children | 1d4dcb5c8708 |
line wrap: on
line diff
--- a/python/ml.py Tue Aug 11 10:52:04 2015 -0400 +++ b/python/ml.py Tue Aug 11 11:38:05 2015 -0400 @@ -111,13 +111,59 @@ code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma -def motionPatterLearning(objects, maxDistance): +def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False): + '''Finds exemplar (prototype) instance that represent each cluster + Returns the prototype indices (in the instances list) and the cluster label of each instance + + the elements in the instances list must have a length (method __len__), or one can use the random initialization + the positions in the instances list corresponds to the similarityMatrix + + if an instance is different enough (<minSimilarity), + it will become a new prototype. + Non-prototype instances will be assigned to an existing prototype + if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters + and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' + + # sort instances based on length + indices = range(similarityMatrix.shape[0]) + if randomInitialization: + indices = np.random.permutation(indices) + else: + def compare(i, j): + if len(instances[i]) > len(instances[j]): + return -1 + elif len(instances[i]) == len(instances[j]): + return 0 + else: + return 1 + indices.sort(compare) + # go through all instances + prototypeIndices = [indices[0]] + for i in indices[1:]: + if similarityMatrix[i][prototypeIndices].max() < minSimilarity: + prototypeIndices.append(i) + + # assignment + indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices] + assign = True + while assign: + labels = [-1]*similarityMatrix.shape[0] + for i in prototypeIndices: + labels[i] = i + for i in indices: + prototypeIndex = similarityMatrix[i][prototypeIndices].argmax() + labels[i] = prototypeIndices[prototypeIndex] + clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} + smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) + assign = (clusterSizes[smallestClusterIndex] < minClusterSize) + if assign: + prototypeIndices.remove(smallestClusterIndex) + indices.append(smallestClusterIndex) + + return prototypeIndices, labels + +def motionPatternLearning(objects, maxDistance): ''' Option to use only the (n?) longest features per object instead of all for speed up TODO''' pass - -def prototypeCluster(): - ''' - TODO''' - pass