diff python/ml.py @ 843:5dc7a507353e

updated to learn prototypes
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 13 Jul 2016 23:45:47 -0400
parents 52aa03260f03
children 8e8ec4ece66e
line wrap: on
line diff
--- a/python/ml.py	Mon Jul 11 17:52:06 2016 -0400
+++ b/python/ml.py	Wed Jul 13 23:45:47 2016 -0400
@@ -145,7 +145,9 @@
     it will become a new prototype. 
     Non-prototype instances will be assigned to an existing prototype
     if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
-    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
+
+    TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)'''
 
     # sort instances based on length
     indices = range(len(instances))
@@ -169,7 +171,11 @@
                     similarities[i][j] = similarityFunc(instances[i], instances[j])
                     similarities[j][i] = similarities[i][j]
         if similarities[i][prototypeIndices].max() < minSimilarity:
-             prototypeIndices.append(i)
+            prototypeIndices.append(i)
+        elif randomInitialization: # replace prototype by current instance i if longer
+            label = similarities[i][prototypeIndices].argmax()
+            if len(instances[prototypeIndices[label]]) < len(instances[i]):
+                prototypeIndices[label] = i
 
     # assignment
     indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]