Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 369:027e254f0b53
lcss subclass for indicators
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 15 Jul 2013 16:47:09 -0400 |
parents | 2db4e76599a1 |
children | 97e8fa0ee9a1 |
comparison
equal
deleted
inserted
replaced
368:2db4e76599a1 | 369:027e254f0b53 |
---|---|
139 | 139 |
140 ######################### | 140 ######################### |
141 # sequence section | 141 # sequence section |
142 ######################### | 142 ######################### |
143 | 143 |
144 def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')): | 144 class LCSS: |
145 from numpy import zeros, int as npint | 145 '''Class that keeps the LCSS parameters |
146 n1 = len(l1) | 146 and puts together the various computations''' |
147 n2 = len(l2) | 147 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): |
148 similarities = zeros((n1+1,n2+1), dtype = npint) | 148 self.similarityFunc = similarityFunc |
149 for i in xrange(1,n1+1): | 149 self.aligned = aligned |
150 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): | 150 self.delta = delta |
151 if similarityFunc(l1[i-1], l2[j-1]): | 151 self.lengthFunc = lengthFunc |
152 similarities[i,j] = similarities[i-1,j-1]+1 | 152 |
153 def similarities(self, l1, l2): | |
154 from numpy import zeros, int as npint | |
155 n1 = len(l1) | |
156 n2 = len(l2) | |
157 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | |
158 for i in xrange(1,n1+1): | |
159 for j in xrange(max(1,i-self.delta),min(n2+1,i+self.delta+1)): | |
160 if self.similarityFunc(l1[i-1], l2[j-1]): | |
161 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | |
162 else: | |
163 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
164 | |
165 def subSequence(self, i, j): | |
166 '''Returns the subsequence of two sequences | |
167 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | |
168 if i == 0 or j == 0: | |
169 return [] | |
170 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]: | |
171 return self.subSequence(i, j-1) | |
172 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: | |
173 return self.subSequence(i-1, j) | |
174 else: | |
175 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | |
176 | |
177 def computeLCSS(self, l1, l2, computeSubSequence = False): | |
178 '''returns the longest common subsequence similarity | |
179 based on the threshold on distance between two elements of lists l1, l2 | |
180 similarityFunc returns True or False whether the two points are considered similar | |
181 | |
182 eg distance(p1, p2) < epsilon | |
183 ''' | |
184 from numpy import argmax | |
185 self.similarities(l1, l2) | |
186 imax = argmax(self.similarityTable[:,-1]) | |
187 jmax = argmax(self.similarityTable[-1,:]) | |
188 if computeSubSequence: | |
189 if self.similarityTable[imax, -1] > self.similarityTable[-1, jmax]: | |
190 lcss = self.similarityTable[imax, -1] | |
191 self.subSequenceIndices = self.subSequence(imax, len(l2)) | |
153 else: | 192 else: |
154 similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1]) | 193 lcss = self.similarityTable[imax, -1] |
155 return similarities | 194 self.subSequenceIndices = self.subSequence(len(l1), jmax) |
156 | 195 return lcss |
157 def subSequence(similarities, i, j): | 196 else: |
158 '''Returns the subsequence of two sequences | 197 return max(self.similarityTable[imax, -1], self.similarityTable[-1, jmax]) |
159 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | 198 |
160 if i == 0 or j == 0: | 199 def compute(self, _l1, _l2, computeSubSequence = False): |
161 return [] | 200 '''returns the best matching if using a finite delta by shiftinig the series alignments''' |
162 elif similarities[i][j] == similarities[i][j-1]: | 201 if self.aligned: |
163 return subSequence(similarities, i, j-1) | 202 if len(_l2) < len(_l1): # l1 is the shortest |
164 elif similarities[i][j] == similarities[i-1][j]: | 203 l1 = _l2 |
165 return subSequence(similarities, i-1, j) | 204 l2 = _l1 |
166 else: | 205 else: |
167 return subSequence(similarities, i-1, j-1) + [(i-1,j-1)] | 206 l1 = _l1 |
168 | 207 l2 = _l2 |
169 def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False): | 208 n1 = len(l1) |
170 '''returns the longest common subsequence similarity | 209 n2 = len(l2) |
171 based on the threshold on distance between two elements of lists l1, l2 | 210 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 |
172 similarityFunc returns True or False whether the two points are considered similar | 211 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) |
173 | 212 # print l2[max(0,i-n1):] |
174 eg distance(p1, p2) < epsilon | 213 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) |
175 ''' | 214 lcss = [self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1))] |
176 from numpy import argmax | 215 return max(lcss) |
177 similarities = sequenceSimilarities(l1, l2, similarityFunc, delta) | 216 else: |
178 imax = argmax(similarities[:,-1]) | 217 return self.computeLCSS(_l1, _l2, computeSubSequence) |
179 jmax = argmax(similarities[-1,:]) | 218 |
180 if returnSubSequence: | 219 def get(self, l1, l2, computeSubSequence = False): |
181 if similarities[imax, -1] > similarities[-1, jmax]: | 220 '''get methods are to be shadowed in child classes ''' |
182 lcss = similarities[imax, -1] | 221 return self.compute(l1, l2, computeSubSequence) |
183 subseq = subSequence(similarities, imax, len(l2)) | 222 |
184 else: | 223 def computeAlignement(self): |
185 lcss = similarities[-1, jmax] | 224 from numpy import mean |
186 subseq = subSequence(similarities, len(l1), jmax) | 225 return mean([j-i for i,j in self.subSequenceIndices]) |
187 return lcss, subseq | 226 |
188 else: | 227 def computeNormalized(self, l1, l2): |
189 return max(similarities[imax, -1], similarities[-1, jmax]) | 228 ''' compute the normalized LCSS |
190 | 229 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
191 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): | 230 lengthFunc = lambda x,y:float(x,y)/2''' |
192 ''' compute the normalized LCSS | 231 return float(self.compute(l1, l2))/self.lengthFunc(len(l1), len(l2)) |
193 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) | 232 |
194 lengthFunc = lambda x,y:float(x,y)/2''' | 233 def getNormalized(self, l1, l2): |
195 return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) | 234 return self.computeNormalized(l1, l2) |
196 | 235 |
197 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): | 236 def computeDistance(self, l1, l2): |
198 ''' compute the LCSS distance''' | 237 ''' compute the LCSS distance''' |
199 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc) | 238 return 1-self.computeNormalized(l1, l2) |
200 | 239 |
201 def alignedLCSS(_l1, _l2, similarityFunc, delta): | 240 def getDistance(self, l1, l2): |
202 '''returns the best matching if using a finite delta by shiftinig the series alignments''' | 241 return self.computeDistance(l1, l2) |
203 if len(_l2) < len(_l1): # l1 is the shortest | 242 |
204 l1 = _l2 | |
205 l2 = _l1 | |
206 else: | |
207 l1 = _l1 | |
208 l2 = _l2 | |
209 n1 = len(l1) | |
210 n2 = len(l2) | |
211 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 | |
212 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) | |
213 # print l2[max(0,i-n1):] | |
214 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) | |
215 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] | |
216 return max(lcss) | |
217 | |
218 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): | |
219 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) | |
220 | |
221 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): | |
222 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc) | |
223 | |
224 ######################### | 243 ######################### |
225 # maths section | 244 # maths section |
226 ######################### | 245 ######################### |
227 | 246 |
228 def framesToTime(nFrames, frameRate, initialTime = (0.,0.,0.)): | 247 def framesToTime(nFrames, frameRate, initialTime = (0.,0.,0.)): |