diff trafficintelligence/ml.py @ 1054:d13f9bfbf3ff

Retry
author Wendlasida
date Fri, 06 Jul 2018 18:42:58 -0400
parents 75a6ad604cc5
children ab4c72b9475c
line wrap: on
line diff
--- a/trafficintelligence/ml.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/ml.py	Fri Jul 06 18:42:58 2018 -0400
@@ -150,16 +150,13 @@
     code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
     return code,sigma
 
-def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
+def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0):
     '''Assigns instances to prototypes 
     if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
     and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
 
     labels are indices in the prototypeIndices'''
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
-
+    prototypeIndices = copy(initialPrototypeIndices)
     indices = [i for i in range(len(instances)) if i not in prototypeIndices]
     labels = [-1]*len(instances)
     assign = True
@@ -184,7 +181,7 @@
             indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
     return prototypeIndices, labels
 
-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list)
 
@@ -205,22 +202,12 @@
     if len(instances) == 0:
         print('no instances to cluster (empty list)')
         return None
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
 
     # sort instances based on length
     indices = list(range(len(instances)))
     if randomInitialization or optimizeCentroid:
         indices = np.random.permutation(indices).tolist()
     else:
-        def compare(i, j):
-            if len(instances[i]) > len(instances[j]):
-                return -1
-            elif len(instances[i]) == len(instances[j]):
-                return 0
-            else:
-                return 1
         indices.sort(key=lambda i: len(instances[i]))
     # initialize clusters
     clusters = []