Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 689:9990ef119bce dev
added version of LCSS with cdist computations
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 29 Jun 2015 08:35:27 -0400 |
parents | de278c5e65f6 |
children | 8d99a9e16644 |
comparison
equal
deleted
inserted
replaced
688:f2b52355a286 | 689:9990ef119bce |
---|---|
4 | 4 |
5 import matplotlib.pyplot as plt | 5 import matplotlib.pyplot as plt |
6 from datetime import time, datetime | 6 from datetime import time, datetime |
7 from math import sqrt, ceil, floor | 7 from math import sqrt, ceil, floor |
8 from scipy.stats import kruskal, shapiro | 8 from scipy.stats import kruskal, shapiro |
9 from scipy.spatial import distance | |
9 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma | 10 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma |
10 | 11 |
11 | 12 |
12 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 13 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
13 | 14 |
629 and puts together the various computations | 630 and puts together the various computations |
630 | 631 |
631 the methods with names starting with _ are not to be shadowed | 632 the methods with names starting with _ are not to be shadowed |
632 in child classes, who will shadow the other methods, | 633 in child classes, who will shadow the other methods, |
633 ie compute and computeXX methods''' | 634 ie compute and computeXX methods''' |
634 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): | 635 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): |
635 self.similarityFunc = similarityFunc | 636 '''One should provide either a similarity function |
636 self.aligned = aligned | 637 that indicates (return bool) whether elements in the compares lists are similar |
637 self.delta = delta | 638 |
638 self.lengthFunc = lengthFunc | 639 eg distance(p1, p2) < epsilon |
639 self.subSequenceIndices = [(0,0)] | 640 |
641 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' | |
642 if similarityFunc is None and metric is None: | |
643 print("No way to compute LCSS, similarityFunc and metric are None. Exiting") | |
644 import sys | |
645 sys.exit() | |
646 elif metric is not None and epsilon is None: | |
647 print("Please provide a value for epsilon if using a cdist metric. Exiting") | |
648 import sys | |
649 sys.exit() | |
650 else: | |
651 self.similarityFunc = similarityFunc | |
652 self.metric = metric | |
653 self.epsilon = epsilon | |
654 self.aligned = aligned | |
655 self.delta = delta | |
656 self.lengthFunc = lengthFunc | |
657 self.subSequenceIndices = [(0,0)] | |
640 | 658 |
641 def similarities(self, l1, l2, jshift=0): | 659 def similarities(self, l1, l2, jshift=0): |
642 n1 = len(l1) | 660 n1 = len(l1) |
643 n2 = len(l2) | 661 n2 = len(l2) |
644 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 662 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
645 for i in xrange(1,n1+1): | 663 if self.similarityFunc is not None: |
646 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | 664 for i in xrange(1,n1+1): |
647 if self.similarityFunc(l1[i-1], l2[j-1]): | 665 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
648 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 666 if self.similarityFunc(l1[i-1], l2[j-1]): |
649 else: | 667 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
650 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 668 else: |
669 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
670 elif self.metric is not None: | |
671 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon | |
672 for i in xrange(1,n1+1): | |
673 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | |
674 if similarElements[i-1, j-1]: | |
675 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | |
676 else: | |
677 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
678 | |
651 | 679 |
652 def subSequence(self, i, j): | 680 def subSequence(self, i, j): |
653 '''Returns the subsequence of two sequences | 681 '''Returns the subsequence of two sequences |
654 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | 682 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' |
655 if i == 0 or j == 0: | 683 if i == 0 or j == 0: |
661 else: | 689 else: |
662 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | 690 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
663 | 691 |
664 def _compute(self, _l1, _l2, computeSubSequence = False): | 692 def _compute(self, _l1, _l2, computeSubSequence = False): |
665 '''returns the longest common subsequence similarity | 693 '''returns the longest common subsequence similarity |
666 based on the threshold on distance between two elements of lists l1, l2 | 694 l1 and l2 should be the right format |
667 similarityFunc returns True or False whether the two points are considered similar | 695 eg list of tuple points for cdist |
696 or elements that can be compare using similarityFunc | |
668 | 697 |
669 if aligned, returns the best matching if using a finite delta by shifting the series alignments | 698 if aligned, returns the best matching if using a finite delta by shifting the series alignments |
670 | |
671 eg distance(p1, p2) < epsilon | |
672 ''' | 699 ''' |
673 if len(_l2) < len(_l1): # l1 is the shortest | 700 if len(_l2) < len(_l1): # l1 is the shortest |
674 l1 = _l2 | 701 l1 = _l2 |
675 l2 = _l1 | 702 l2 = _l1 |
676 revertIndices = True | 703 revertIndices = True |