Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 373:d0b86ed50f32
work in progress on LCSS
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 16 Jul 2013 17:45:07 -0400 |
parents | 349eb1e09f45 |
children | a7af3519687e |
comparison
equal
deleted
inserted
replaced
372:349eb1e09f45 | 373:d0b86ed50f32 |
---|---|
216 self.aligned = aligned | 216 self.aligned = aligned |
217 self.delta = delta | 217 self.delta = delta |
218 self.lengthFunc = lengthFunc | 218 self.lengthFunc = lengthFunc |
219 self.alignmentShift = 0 | 219 self.alignmentShift = 0 |
220 | 220 |
221 def similarities(self, l1, l2, shift=0): | 221 def similarities(self, l1, l2, jshift=0): |
222 from numpy import zeros, int as npint | 222 from numpy import zeros, int as npint |
223 n1 = len(l1) | 223 n1 = len(l1) |
224 n2 = len(l2) | 224 n2 = len(l2) |
225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
226 for i in xrange(1,n1+1): | 226 for i in xrange(1,n1+1): |
227 for j in xrange(max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)): | 227 for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): |
228 #print max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1) | 228 print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1) |
229 if self.similarityFunc(l1[i-1], l2[j-1]): | 229 if self.similarityFunc(l1[i-1], l2[j-1]): |
230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
231 else: | 231 else: |
232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
233 | 233 |
241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: | 241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: |
242 return self.subSequence(i-1, j) | 242 return self.subSequence(i-1, j) |
243 else: | 243 else: |
244 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | 244 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
245 | 245 |
246 def computeLCSS(self, l1, l2, computeSubSequence = False): | 246 # def computeLCSS(self, l1, l2, computeSubSequence = False): |
247 # '''returns the longest common subsequence similarity | |
248 # based on the threshold on distance between two elements of lists l1, l2 | |
249 # similarityFunc returns True or False whether the two points are considered similar | |
250 | |
251 # eg distance(p1, p2) < epsilon | |
252 # ''' | |
253 # #from numpy import argmax | |
254 # self.similarities(l1, l2) | |
255 # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] | |
256 # if computeSubSequence: | |
257 # self.subSequenceIndices = self.subSequence(len(l1), len(l2)) | |
258 # return self.similarityTable.max() | |
259 | |
260 def _compute(self, _l1, _l2, computeSubSequence = False): | |
247 '''returns the longest common subsequence similarity | 261 '''returns the longest common subsequence similarity |
248 based on the threshold on distance between two elements of lists l1, l2 | 262 based on the threshold on distance between two elements of lists l1, l2 |
249 similarityFunc returns True or False whether the two points are considered similar | 263 similarityFunc returns True or False whether the two points are considered similar |
250 | 264 |
265 if aligned, returns the best matching if using a finite delta by shiftinig the series alignments | |
266 | |
251 eg distance(p1, p2) < epsilon | 267 eg distance(p1, p2) < epsilon |
252 ''' | 268 ''' |
253 #from numpy import argmax | |
254 self.similarities(l1, l2) | |
255 #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] | |
256 if computeSubSequence: | |
257 self.subSequenceIndices = self.subSequence(len(l1), len(l2)) | |
258 return self.similarityTable.max() | |
259 | |
260 def _compute(self, _l1, _l2, computeSubSequence = False): | |
261 '''returns the best matching if using a finite delta by shiftinig the series alignments''' | |
262 if len(_l2) < len(_l1): # l1 is the shortest | 269 if len(_l2) < len(_l1): # l1 is the shortest |
263 l1 = _l2 | 270 l1 = _l2 |
264 l2 = _l1 | 271 l2 = _l1 |
265 revertIndices = True | 272 revertIndices = True |
266 else: | 273 else: |
269 revertIndices = False | 276 revertIndices = False |
270 n1 = len(l1) | 277 n1 = len(l1) |
271 n2 = len(l2) | 278 n2 = len(l2) |
272 | 279 |
273 if self.aligned: | 280 if self.aligned: |
274 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 | |
275 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) | |
276 # print l2[max(0,i-n1):] | |
277 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) | |
278 lcssValues = {} | 281 lcssValues = {} |
279 similarityTables = {} | 282 similarityTables = {} |
280 #for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1)): | 283 for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval |
281 for i in xrange(-max(n1,n2)-self.delta, +max(n1,n2)+self.delta): | |
282 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) | 284 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) |
283 #print l2[max(0,i-n1):] | 285 #print l2[max(0,i-n1):] |
284 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) | 286 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) |
285 #print i, lcssValues[i] | 287 #print i, lcssValues[i] |
286 lcssValues[i] = self.similarities(l1, l2, i) | 288 self.similarities(l1, l2, i) |
289 lcssValues[i] = self.similarityTable.max() | |
287 similarityTables[i] = self.similarityTable | 290 similarityTables[i] = self.similarityTable |
288 #print i | |
289 print self.similarityTable | 291 print self.similarityTable |
290 self.alignmentShift = argMaxDict(lcssValues) | 292 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance |
291 self.similarityTable = similarityTables[self.alignmentShift] | 293 self.similarityTable = similarityTables[self.alignmentShift] |
292 # do the subsequence computation here, once similarityTable is set | |
293 #self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) | |
294 #lcss = lcssValues[imax] | |
295 else: | 294 else: |
296 self.alignmentShift = 0 | 295 self.alignmentShift = 0 |
297 self.similarities(l1, l2) | 296 self.similarities(l1, l2) |
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] | 297 |
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] | |
299 | |
299 if computeSubSequence: | 300 if computeSubSequence: |
300 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) | 301 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) |
301 if revertIndices: | 302 if revertIndices: |
302 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] | 303 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] |
303 #self.alignmentShift = imax-n1 | |
304 else: | 304 else: |
305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] | 305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] |
306 #self.alignmentShift = n1-imax | |
307 return self.similarityTable[-1,-1] | 306 return self.similarityTable[-1,-1] |
308 | 307 |
309 def compute(self, l1, l2, computeSubSequence = False): | 308 def compute(self, l1, l2, computeSubSequence = False): |
310 '''get methods are to be shadowed in child classes ''' | 309 '''get methods are to be shadowed in child classes ''' |
311 return self._compute(l1, l2, computeSubSequence) | 310 return self._compute(l1, l2, computeSubSequence) |