comparison python/events.py @ 728:4e89341edd29 dev

added the capability to enforce a minimum cluster size and re-assign the indicators to other clusters
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 10 Aug 2015 17:51:49 -0400
parents e395bffe1412
children b02431a8234c
comparison
equal deleted inserted replaced
716:85af65b6d531 728:4e89341edd29
293 allPoints += points 293 allPoints += points
294 else: 294 else:
295 print('unknown type of point: '+pointType) 295 print('unknown type of point: '+pointType)
296 return allPoints 296 return allPoints
297 297
298 def prototypeCluster(interactions, similarityMatrix, alignmentMatrix, indicatorName, minSimilarity): 298 def prototypeCluster(interactions, similarityMatrix, alignmentMatrix, indicatorName, minSimilarity, minClusterSize = None):
299 '''Finds exemplar indicator time series for all interactions 299 '''Finds exemplar indicator time series for all interactions
300 Returns the prototype indices (in the interaction list) and the label of each indicator (interaction) 300 Returns the prototype indices (in the interaction list) and the label of each indicator (interaction)
301 301
302 if an indicator profile (time series) is different enough (<minSimilarity), 302 if an indicator profile (time series) is different enough (<minSimilarity),
303 it will become a new prototype. 303 it will become a new prototype.
304 Non-prototype interactions will be assigned to an existing prototype''' 304 Non-prototype interactions will be assigned to an existing prototype
305 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
306 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
305 307
306 # sort indicators based on length 308 # sort indicators based on length
307 indices = range(similarityMatrix.shape[0]) 309 indices = range(similarityMatrix.shape[0])
308 def compare(i, j): 310 def compare(i, j):
309 if len(interactions[i].getIndicator(indicatorName)) > len(interactions[j].getIndicator(indicatorName)): 311 if len(interactions[i].getIndicator(indicatorName)) > len(interactions[j].getIndicator(indicatorName)):
318 for i in indices[1:]: 320 for i in indices[1:]:
319 if similarityMatrix[i][prototypeIndices].max() < minSimilarity: 321 if similarityMatrix[i][prototypeIndices].max() < minSimilarity:
320 prototypeIndices.append(i) 322 prototypeIndices.append(i)
321 323
322 # assignment 324 # assignment
323 labels = [-1]*similarityMatrix.shape[0]
324 indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices] 325 indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
325 for i in prototypeIndices: 326 assign = True
326 labels[i] = i 327 while assign:
327 for i in indices: 328 labels = [-1]*similarityMatrix.shape[0]
328 prototypeIndex = similarityMatrix[i][prototypeIndices].argmax() 329 for i in prototypeIndices:
329 labels[i] = prototypeIndices[prototypeIndex] 330 labels[i] = i
331 for i in indices:
332 prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
333 labels[i] = prototypeIndices[prototypeIndex]
334 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
335 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
336 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
337 print prototypeIndices, smallestClusterIndex, clusterSizes[smallestClusterIndex]
338 if assign:
339 prototypeIndices.remove(smallestClusterIndex)
340 indices.append(smallestClusterIndex)
330 341
331 return prototypeIndices, labels 342 return prototypeIndices, labels
332 343
333 def prototypeMultivariateCluster(interactions, similarityMatrics, indicatorNames, minSimilarities, minClusterSize): 344 def prototypeMultivariateCluster(interactions, similarityMatrics, indicatorNames, minSimilarities, minClusterSize):
334 '''Finds exmaple indicator time series (several indicators) for all interactions 345 '''Finds exmaple indicator time series (several indicators) for all interactions