repo/traffic-intelligence: python/ml.py comparison

comparison python/ml.py @ 980:23f98ebb113f

first tests for clustering algo

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Mon, 19 Feb 2018 16:32:59 -0500
parents	184f1dd307f9
children	e8eabef7857c

comparison

equal deleted inserted replaced

-:cc89267b5ff9
+:23f98ebb113f
 self.prototypeId = prototypeId
 self.memberIndices = memberIndices
 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
 '''Assigns instances to prototypes
-if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
-and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
+labels are indices in the prototypeIndices'''
+if similarityFunc is None:
+print('similarityFunc is None')
+return None
 indices = [i for i in range(len(instances)) if i not in prototypeIndices]
 labels = [-1]*len(instances)
 assign = True
 while assign:
 for i in prototypeIndices:
 labels[i] = i
 for i in indices:
-if similarityFunc is not None:
+for j in prototypeIndices:
-for j in prototypeIndices:
+if similarities[i][j] < 0:
-if similarities[i][j] < 0:
+similarities[i][j] = similarityFunc(instances[i], instances[j])
-similarities[i][j] = similarityFunc(instances[i], instances[j])
+similarities[j][i] = similarities[i][j]
-similarities[j][i] = similarities[i][j]
+label = similarities[i][prototypeIndices].argmax()
-prototypeIdx = similarities[i][prototypeIndices].argmax()
+if similarities[i][prototypeIndices[label]] >= minSimilarity:
-if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
+labels[i] = prototypeIndices[label]
-labels[i] = prototypeIndices[prototypeIdx]
 else:
 labels[i] = -1 # outlier
 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
 if assign:
 prototypeIndices.remove(smallestClusterIndex)
 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
 return prototypeIndices, labels
-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
 '''Finds exemplar (prototype) instance that represent each cluster
 Returns the prototype indices (in the instances list)
-the elements in the instances list must have a length (method __len__), or one can use the random initialization
+the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid
 the positions in the instances list corresponds to the similarities
 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
 similarities must still be allocated with the right size
 if an instance is different enough (<minSimilarity),
 return None
 # sort instances based on length
 indices = range(len(instances))
 if randomInitialization or optimizeCentroid:
-indices = np.random.permutation(indices)
+indices = np.random.permutation(indices).tolist()
 else:
 def compare(i, j):
 if len(instances[i]) > len(instances[j]):
 return -1
 elif len(instances[i]) == len(instances[j]):
 return 0
 else:
 return 1
 indices.sort(compare)
-# go through all instances
+# initialize clusters
 clusters = []
 if initialPrototypeIndices is None:
 prototypeIndices = [indices[0]]
 else:
 prototypeIndices = initialPrototypeIndices # think of the format: if indices, have to be in instances
 for i in prototypeIndices:
 clusters.append([i])
 indices.remove(i)
+# go through all instances
 for i in indices:
 for j in prototypeIndices:
 if similarities[i][j] < 0:
 similarities[i][j] = similarityFunc(instances[i], instances[j])
 similarities[j][i] = similarities[i][j]
-label = similarities[i][prototypeIndices].argmax()
+label = similarities[i][prototypeIndices].argmax() # index in prototypeIndices
 if similarities[i][prototypeIndices[label]] < minSimilarity:
 prototypeIndices.append(i)
 clusters.append([])
 else:
 clusters[label].append(i)
 maxima = model.means_.max(0)
 xwidth = 0.5*(maxima[0]-minima[0])
 ywidth = 0.5*(maxima[1]-minima[1])
 plt.xlim(minima[0]-xwidth,maxima[0]+xwidth)
 plt.ylim(minima[1]-ywidth,maxima[1]+ywidth)
+if __name__ == "__main__":
+import doctest
+import unittest
+suite = doctest.DocFileSuite('tests/ml.txt')
+unittest.TextTestRunner().run(suite)
+#     #doctest.testmod()
+#     #doctest.testfile("example.txt")

Mercurial Hosting > traffic-intelligence

comparison python/ml.py @ 980:23f98ebb113f