comparison python/utils.py @ 689:9990ef119bce dev

added version of LCSS with cdist computations
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 29 Jun 2015 08:35:27 -0400
parents de278c5e65f6
children 8d99a9e16644
comparison
equal deleted inserted replaced
688:f2b52355a286 689:9990ef119bce
4 4
5 import matplotlib.pyplot as plt 5 import matplotlib.pyplot as plt
6 from datetime import time, datetime 6 from datetime import time, datetime
7 from math import sqrt, ceil, floor 7 from math import sqrt, ceil, floor
8 from scipy.stats import kruskal, shapiro 8 from scipy.stats import kruskal, shapiro
9 from scipy.spatial import distance
9 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma 10 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma
10 11
11 12
12 datetimeFormat = "%Y-%m-%d %H:%M:%S" 13 datetimeFormat = "%Y-%m-%d %H:%M:%S"
13 14
629 and puts together the various computations 630 and puts together the various computations
630 631
631 the methods with names starting with _ are not to be shadowed 632 the methods with names starting with _ are not to be shadowed
632 in child classes, who will shadow the other methods, 633 in child classes, who will shadow the other methods,
633 ie compute and computeXX methods''' 634 ie compute and computeXX methods'''
634 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): 635 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
635 self.similarityFunc = similarityFunc 636 '''One should provide either a similarity function
636 self.aligned = aligned 637 that indicates (return bool) whether elements in the compares lists are similar
637 self.delta = delta 638
638 self.lengthFunc = lengthFunc 639 eg distance(p1, p2) < epsilon
639 self.subSequenceIndices = [(0,0)] 640
641 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
642 if similarityFunc is None and metric is None:
643 print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
644 import sys
645 sys.exit()
646 elif metric is not None and epsilon is None:
647 print("Please provide a value for epsilon if using a cdist metric. Exiting")
648 import sys
649 sys.exit()
650 else:
651 self.similarityFunc = similarityFunc
652 self.metric = metric
653 self.epsilon = epsilon
654 self.aligned = aligned
655 self.delta = delta
656 self.lengthFunc = lengthFunc
657 self.subSequenceIndices = [(0,0)]
640 658
641 def similarities(self, l1, l2, jshift=0): 659 def similarities(self, l1, l2, jshift=0):
642 n1 = len(l1) 660 n1 = len(l1)
643 n2 = len(l2) 661 n2 = len(l2)
644 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) 662 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
645 for i in xrange(1,n1+1): 663 if self.similarityFunc is not None:
646 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): 664 for i in xrange(1,n1+1):
647 if self.similarityFunc(l1[i-1], l2[j-1]): 665 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
648 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 666 if self.similarityFunc(l1[i-1], l2[j-1]):
649 else: 667 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
650 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 668 else:
669 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
670 elif self.metric is not None:
671 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
672 for i in xrange(1,n1+1):
673 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
674 if similarElements[i-1, j-1]:
675 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
676 else:
677 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
678
651 679
652 def subSequence(self, i, j): 680 def subSequence(self, i, j):
653 '''Returns the subsequence of two sequences 681 '''Returns the subsequence of two sequences
654 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' 682 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
655 if i == 0 or j == 0: 683 if i == 0 or j == 0:
661 else: 689 else:
662 return self.subSequence(i-1, j-1) + [(i-1,j-1)] 690 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
663 691
664 def _compute(self, _l1, _l2, computeSubSequence = False): 692 def _compute(self, _l1, _l2, computeSubSequence = False):
665 '''returns the longest common subsequence similarity 693 '''returns the longest common subsequence similarity
666 based on the threshold on distance between two elements of lists l1, l2 694 l1 and l2 should be the right format
667 similarityFunc returns True or False whether the two points are considered similar 695 eg list of tuple points for cdist
696 or elements that can be compare using similarityFunc
668 697
669 if aligned, returns the best matching if using a finite delta by shifting the series alignments 698 if aligned, returns the best matching if using a finite delta by shifting the series alignments
670
671 eg distance(p1, p2) < epsilon
672 ''' 699 '''
673 if len(_l2) < len(_l1): # l1 is the shortest 700 if len(_l2) < len(_l1): # l1 is the shortest
674 l1 = _l2 701 l1 = _l2
675 l2 = _l1 702 l2 = _l1
676 revertIndices = True 703 revertIndices = True