Mercurial Hosting > traffic-intelligence
diff trafficintelligence/ml.py @ 1054:d13f9bfbf3ff
Retry
author | Wendlasida |
---|---|
date | Fri, 06 Jul 2018 18:42:58 -0400 |
parents | 75a6ad604cc5 |
children | ab4c72b9475c |
line wrap: on
line diff
--- a/trafficintelligence/ml.py Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/ml.py Fri Jul 06 18:42:58 2018 -0400 @@ -150,16 +150,13 @@ code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma -def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0): +def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0): '''Assigns instances to prototypes if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters and reassigning all elements in the cluster until no cluster is smaller than minClusterSize labels are indices in the prototypeIndices''' - if similarityFunc is None: - print('similarityFunc is None') - return None - + prototypeIndices = copy(initialPrototypeIndices) indices = [i for i in range(len(instances)) if i not in prototypeIndices] labels = [-1]*len(instances) assign = True @@ -184,7 +181,7 @@ indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] return prototypeIndices, labels -def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): +def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): '''Finds exemplar (prototype) instance that represent each cluster Returns the prototype indices (in the instances list) @@ -205,22 +202,12 @@ if len(instances) == 0: print('no instances to cluster (empty list)') return None - if similarityFunc is None: - print('similarityFunc is None') - return None # sort instances based on length indices = list(range(len(instances))) if randomInitialization or optimizeCentroid: indices = np.random.permutation(indices).tolist() else: - def compare(i, j): - if len(instances[i]) > len(instances[j]): - return -1 - elif len(instances[i]) == len(instances[j]): - return 0 - else: - return 1 indices.sort(key=lambda i: len(instances[i])) # initialize clusters clusters = []