Mercurial Hosting > traffic-intelligence
changeset 689:9990ef119bce dev
added version of LCSS with cdist computations
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 29 Jun 2015 08:35:27 -0400 |
parents | f2b52355a286 |
children | 463150a8e129 |
files | python/tests/utils.txt python/utils.py |
diffstat | 2 files changed, 55 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/python/tests/utils.txt Fri Jun 26 23:49:44 2015 -0400 +++ b/python/tests/utils.txt Mon Jun 29 08:35:27 2015 -0400 @@ -50,7 +50,7 @@ >>> mostCommon([range(2), range(4), range(2)]) [0, 1] ->>> lcss = LCSS(lambda x,y: abs(x-y) <= 0.1) +>>> lcss = LCSS(similarityFunc = lambda x,y: abs(x-y) <= 0.1) >>> lcss.compute(range(5), range(5)) 5 >>> lcss.compute(range(1,5), range(5)) @@ -59,8 +59,6 @@ 0 >>> lcss.compute(range(5), range(10)) 5 ->>> lcss.compute(range(5), range(10), 2) -5 >>> lcss.similarityFunc = lambda x,y: x == y >>> lcss.compute(['a','b','c'], ['a','b','c', 'd']) 3 @@ -117,3 +115,14 @@ 8 >>> lcss.subSequenceIndices [(2, 0), (4, 1), (6, 2), (7, 3), (8, 4), (9, 5), (11, 6), (13, 7)] + +>>> lcss = LCSS(metric = 'cityblock', epsilon = 0.1) +>>> lcss.compute([[i] for i in range(5)], [[i] for i in range(5)]) +5 +>>> lcss.compute([[i] for i in range(1,5)], [[i] for i in range(5)]) +4 +>>> lcss.compute([[i] for i in range(5,10)], [[i] for i in range(5)]) +0 +>>> lcss.compute([[i] for i in range(5)], [[i] for i in range(10)]) +5 +
--- a/python/utils.py Fri Jun 26 23:49:44 2015 -0400 +++ b/python/utils.py Mon Jun 29 08:35:27 2015 -0400 @@ -6,6 +6,7 @@ from datetime import time, datetime from math import sqrt, ceil, floor from scipy.stats import kruskal, shapiro +from scipy.spatial import distance from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma @@ -631,23 +632,50 @@ the methods with names starting with _ are not to be shadowed in child classes, who will shadow the other methods, ie compute and computeXX methods''' - def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): - self.similarityFunc = similarityFunc - self.aligned = aligned - self.delta = delta - self.lengthFunc = lengthFunc - self.subSequenceIndices = [(0,0)] + def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): + '''One should provide either a similarity function + that indicates (return bool) whether elements in the compares lists are similar + + eg distance(p1, p2) < epsilon + + or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' + if similarityFunc is None and metric is None: + print("No way to compute LCSS, similarityFunc and metric are None. Exiting") + import sys + sys.exit() + elif metric is not None and epsilon is None: + print("Please provide a value for epsilon if using a cdist metric. Exiting") + import sys + sys.exit() + else: + self.similarityFunc = similarityFunc + self.metric = metric + self.epsilon = epsilon + self.aligned = aligned + self.delta = delta + self.lengthFunc = lengthFunc + self.subSequenceIndices = [(0,0)] def similarities(self, l1, l2, jshift=0): n1 = len(l1) n2 = len(l2) self.similarityTable = zeros((n1+1,n2+1), dtype = npint) - for i in xrange(1,n1+1): - for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): - if self.similarityFunc(l1[i-1], l2[j-1]): - self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 - else: - self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + if self.similarityFunc is not None: + for i in xrange(1,n1+1): + for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): + if self.similarityFunc(l1[i-1], l2[j-1]): + self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 + else: + self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + elif self.metric is not None: + similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon + for i in xrange(1,n1+1): + for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): + if similarElements[i-1, j-1]: + self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 + else: + self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + def subSequence(self, i, j): '''Returns the subsequence of two sequences @@ -663,12 +691,11 @@ def _compute(self, _l1, _l2, computeSubSequence = False): '''returns the longest common subsequence similarity - based on the threshold on distance between two elements of lists l1, l2 - similarityFunc returns True or False whether the two points are considered similar + l1 and l2 should be the right format + eg list of tuple points for cdist + or elements that can be compare using similarityFunc if aligned, returns the best matching if using a finite delta by shifting the series alignments - - eg distance(p1, p2) < epsilon ''' if len(_l2) < len(_l1): # l1 is the shortest l1 = _l2