annotate python/utils.py @ 854:33d296984dd8

rework and more info on speed probabilities for classification
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 22 Sep 2016 17:50:35 -0400
parents 36c5bee9a887
children 2277ab1a8141
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1 #! /usr/bin/env python
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
2 # -*- coding: utf-8 -*-
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
3 ''' Generic utilities.'''
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
4
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
5 import matplotlib.pyplot as plt
397
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
6 from datetime import time, datetime
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
7 from math import sqrt, ceil, floor
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
8 from scipy.stats import kruskal, shapiro
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
9 from scipy.spatial import distance
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
10 from scipy.sparse import dok_matrix
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
11 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma, isinf, savez, load as npload
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
12
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
13
421
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
14 datetimeFormat = "%Y-%m-%d %H:%M:%S"
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
15
185
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
16 #########################
742
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
17 # Strings
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
18 #########################
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
19
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
20 def upperCaseFirstLetter(s):
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
21 words = s.split(' ')
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
22 lowerWords = [w[0].upper()+w[1:].lower() for w in words]
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
23 return ' '.join(lowerWords)
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
24
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
25 #########################
185
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
26 # Enumerations
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
27 #########################
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
28
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
29 def inverseEnumeration(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
30 'Returns the dictionary that provides for each element in the input list its index in the input list'
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
31 result = {}
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
32 for i,x in enumerate(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
33 result[x] = i
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
34 return result
155
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
35
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
36 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
37 # Simple statistics
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
38 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
39
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
40 def logNormalMeanVar(loc, scale):
687
de278c5e65f6 minor comments for lognormal parameters (numpy and usual names differ)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 686
diff changeset
41 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution
854
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
42 https://en.wikipedia.org/wiki/Log-normal_distribution
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
43
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
44 same as lognorm.stats(scale, 0, exp(loc))'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
45 mean = exp(loc+(scale**2)/2)
854
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
46 var = (exp(scale**2)-1)*exp(2*loc+scale**2)
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
47 return mean, var
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
48
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
49 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False):
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
50 from scipy.stats.distributions import norm
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
51 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
52 if printLatex:
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
53 print('${0}^2\\frac{{{1}^2}}{{{2}^2}}$'.format(k, stdev, tolerance))
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
54 return (k*stdev/tolerance)**2
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
55
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
56 def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False):
499
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
57 '''if trueStd, use normal distribution, otherwise, Student
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
58
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
59 Use otherwise t.interval or norm.interval
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
60 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11))
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
61 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples))
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
62 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)'''
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
63 from scipy.stats.distributions import norm, t
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
64 if trueStd:
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
65 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
66 else: # use Student
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
67 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100.
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
68 e = k*stdev/sqrt(nSamples)
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
69 if printLatex:
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
70 print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples))
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
71 return mean-e, mean+e
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
72
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
73 def computeChi2(expected, observed):
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
74 '''Returns the Chi2 statistics'''
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
75 result = 0.
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
76 for e, o in zip(expected, observed):
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
77 result += ((e-o)*(e-o))/e
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
78 return result
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
79
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
80 class DistributionSample(object):
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
81 def nSamples(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
82 return sum(self.counts)
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
83
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
84 def cumulativeDensityFunction(sample, normalized = False):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
85 '''Returns the cumulative density function of the sample of a random variable'''
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
86 xaxis = sorted(sample)
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
87 counts = arange(1,len(sample)+1) # dtype = float
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
88 if normalized:
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
89 counts /= float(len(sample))
197
2788b2827670 simple cumulatie function distribution computation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 185
diff changeset
90 return xaxis, counts
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
91
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
92 class DiscreteDistributionSample(DistributionSample):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
93 '''Class to represent a sample of a distribution for a discrete random variable'''
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
94 def __init__(self, categories, counts):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
95 self.categories = categories
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
96 self.counts = counts
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
97
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
98 def mean(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
99 result = [float(x*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
100 return npsum(result)/self.nSamples()
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
101
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
102 def var(self, mean = None):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
103 if not mean:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
104 m = self.mean()
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
105 else:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
106 m = mean
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
107 result = 0.
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
108 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
109 return npsum(squares)/(self.nSamples()-1)
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
110
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
111 def referenceCounts(self, probability):
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
112 '''probability is a function that returns the probability of the random variable for the category values'''
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
113 refProba = [probability(c) for c in self.categories]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
114 refProba[-1] = 1-npsum(refProba[:-1])
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
115 refCounts = [r*self.nSamples() for r in refProba]
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
116 return refCounts, refProba
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
117
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
118 class ContinuousDistributionSample(DistributionSample):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
119 '''Class to represent a sample of a distribution for a continuous random variable
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
120 with the number of observations for each interval
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
121 intervals (categories variable) are defined by their left limits, the last one being the right limit
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
122 categories contain therefore one more element than the counts'''
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
123 def __init__(self, categories, counts):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
124 # todo add samples for initialization and everything to None? (or setSamples?)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
125 self.categories = categories
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
126 self.counts = counts
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
127
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
128 @staticmethod
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
129 def generate(sample, categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
130 if min(sample) < min(categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
131 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories)))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
132 if max(sample) > max(categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
133 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories)))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
134 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
135 for s in sample:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
136 i = 0
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
137 while i<len(dist.categories) and dist.categories[i] <= s:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
138 i += 1
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
139 if i <= len(dist.counts):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
140 dist.counts[i-1] += 1
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
141 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i]))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
142 else:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
143 print('Element {} is not in the categories'.format(s))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
144 return dist
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
145
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
146 def mean(self):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
147 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
148 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
149 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
150 return result/self.nSamples()
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
151
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
152 def var(self, mean = None):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
153 if not mean:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
154 m = self.mean()
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
155 else:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
156 m = mean
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
157 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
158 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
159 mid = (self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
160 result += self.counts[i]*(mid - m)*(mid - m)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
161 return result/(self.nSamples()-1)
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
162
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
163 def referenceCounts(self, cdf):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
164 '''cdf is a cumulative distribution function
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
165 returning the probability of the variable being less that x'''
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
166 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
167 # for inter in self.categories:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
168 # refCumulativeCounts.append(cdf(inter[1]))
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
169 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]]
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
170
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
171 refProba = [refCumulativeCounts[0]]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
172 for i in xrange(1,len(refCumulativeCounts)):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
173 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
174 refProba.append(1-refCumulativeCounts[-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
175 refCounts = [p*self.nSamples() for p in refProba]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
176
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
177 return refCounts, refProba
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
178
77
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
179 def printReferenceCounts(self, refCounts=None):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
180 if refCounts:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
181 ref = refCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
182 else:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
183 ref = self.referenceCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
184 for i in xrange(len(ref[0])):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
185 print('{0}-{1} & {2:0.3} & {3:0.3} \\\\'.format(self.categories[i],self.categories[i+1],ref[1][i], ref[0][i]))
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
186
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
187
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
188 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
189 # maths section
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
190 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
191
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
192 # def kernelSmoothing(sampleX, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
193 # '''Returns a smoothed weighted version of Y at the predefined values of sampleX
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
194 # Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
195 # from numpy import zeros, array
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
196 # smoothed = zeros(len(sampleX))
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
197 # for i,x in enumerate(sampleX):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
198 # weights = array([weightFunc(x,xx, halfwidth) for xx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
199 # if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
200 # smoothed[i] = sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
201 # else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
202 # smoothed[i] = 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
203 # return smoothed
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
204
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
205 def kernelSmoothing(x, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
206 '''Returns the smoothed estimate of (X,Y) at x
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
207 Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
208 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
209 if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
210 return sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
211 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
212 return 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
213
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
214 def uniform(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
215 if abs(center-x)<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
216 return 1.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
217 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
218 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
219
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
220 def gaussian(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
221 return exp(-((center-x)/halfwidth)**2/2)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
222
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
223 def epanechnikov(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
224 diff = abs(center-x)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
225 if diff<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
226 return 1.-(diff/halfwidth)**2
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
227 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
228 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
229
434
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
230 def triangular(center, x, halfwidth):
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
231 diff = abs(center-x)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
232 if diff<halfwidth:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
233 return 1.-abs(diff/halfwidth)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
234 else:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
235 return 0.
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
236
518
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
237 def medianSmoothing(x, X, Y, halfwidth):
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
238 '''Returns the media of Y's corresponding to X's in the interval [x-halfwidth, x+halfwidth]'''
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
239 return median([y for observedx, y in zip(X,Y) if abs(x-observedx)<halfwidth])
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
240
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
241 def argmaxDict(d):
561
ee45c6eb6d49 added Mohamed Gomaa Mohamed function to smooth object trajectories
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 553
diff changeset
242 return max(d, key=d.get)
279
3af4c267a7bf generic simple LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 276
diff changeset
243
837
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
244 def deltaFrames(t1, t2, frameRate):
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
245 '''Returns the number of frames between t1 and t2
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
246 positive if t1<=t2, negative otherwise'''
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
247 if t1 > t2:
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
248 return -(t1-t2).seconds*frameRate
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
249 else:
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
250 return (t2-t1).seconds*frameRate
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
251
395
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
252 def framesToTime(nFrames, frameRate, initialTime = time()):
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
253 '''returns a datetime.time for the time in hour, minutes and seconds
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
254 initialTime is a datetime.time'''
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
255 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second)
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
256 h = int(floor(seconds/3600.))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
257 seconds = seconds - h*3600
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
258 m = int(floor(seconds/60))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
259 seconds = seconds - m*60
262
a048066bd20f correcting bug in framesToTime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 261
diff changeset
260 return time(h, m, seconds)
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
261
381
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
262 def timeToFrames(t, frameRate):
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
263 return frameRate*(t.hour*3600+t.minute*60+t.second)
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
264
241
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
265 def sortXY(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
266 'returns the sorted (x, Y(x)) sorted on X'
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
267 D = {}
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
268 for x, y in zip(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
269 D[x]=y
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
270 xsorted = sorted(D.keys())
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
271 return xsorted, [D[x] for x in xsorted]
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
272
733
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
273 def compareLengthForSort(i, j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
274 if len(i) < len(j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
275 return -1
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
276 elif len(i) == len(j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
277 return 0
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
278 else:
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
279 return 1
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
280
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
281 def sortByLength(instances, reverse = False):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
282 '''Returns a new list with the instances sorted by length (method __len__)
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
283 reverse is passed to sorted'''
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
284 return sorted(instances, cmp = compareLengthForSort, reverse = reverse)
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
285
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
286 def ceilDecimals(v, nDecimals):
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
287 '''Rounds the number at the nth decimal
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
288 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
289 tens = 10**nDecimals
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
290 return ceil(v*tens)/tens
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
291
152
74b1fc68d4df re-organized code to avoid cyclic python module dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 116
diff changeset
292 def inBetween(bound1, bound2, x):
698
8d99a9e16644 added clarification comments
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 689
diff changeset
293 'useful if one does not know the order of bound1/bound2'
569
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
294 return bound1 <= x <= bound2 or bound2 <= x <= bound1
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
295
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
296 def pointDistanceL2(x1,y1,x2,y2):
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
297 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)'''
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
298 return sqrt((x2-x1)**2+(y2-y1)**2)
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
299
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
300 def crossProduct(l1, l2):
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
301 return l1[0]*l2[1]-l1[1]*l2[0]
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
302
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
303 def cat_mvgavg(cat_list, halfWidth):
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
304 ''' Return a list of categories/values smoothed according to a window.
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
305 halfWidth is the search radius on either side'''
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
306 from copy import deepcopy
659
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
307 smoothed = deepcopy(cat_list)
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
308 for point in range(len(cat_list)):
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
309 lower_bound_check = max(0,point-halfWidth)
659
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
310 upper_bound_check = min(len(cat_list)-1,point+halfWidth+1)
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
311 window_values = cat_list[lower_bound_check:upper_bound_check]
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
312 smoothed[point] = max(set(window_values), key=window_values.count)
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
313 return smoothed
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
314
547
97c5fef5b2d6 corrected bugs
MohamedGomaa
parents: 521
diff changeset
315 def filterMovingWindow(inputSignal, halfWidth):
29
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
316 '''Returns an array obtained after the smoothing of the input by a moving average
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
317 The first and last points are copied from the original.'''
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
318 width = float(halfWidth*2+1)
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
319 win = ones(width,'d')
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
320 result = convolve(win/width,array(inputSignal),'same')
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
321 result[:halfWidth] = inputSignal[:halfWidth]
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
322 result[-halfWidth:] = inputSignal[-halfWidth:]
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
323 return result
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
324
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
325 def linearRegression(x, y, deg = 1, plotData = False):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
326 '''returns the least square estimation of the linear regression of y = ax+b
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
327 as well as the plot'''
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
328 from numpy.lib.polynomial import polyfit
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
329 from numpy.core.multiarray import arange
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
330 coef = polyfit(x, y, deg)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
331 if plotData:
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
332 def poly(x):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
333 result = 0
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
334 for i in range(len(coef)):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
335 result += coef[i]*x**(len(coef)-i-1)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
336 return result
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
337 plt.plot(x, y, 'x')
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
338 xx = arange(min(x), max(x),(max(x)-min(x))/1000)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
339 plt.plot(xx, [poly(z) for z in xx])
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
340 return coef
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
341
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
342 def correlation(data, correlationMethod = 'pearson', plotFigure = False, displayNames = None, figureFilename = None):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
343 '''Computes (and displays) the correlation matrix for a pandas DataFrame'''
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
344 columns = data.columns.tolist()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
345 for var in data.columns:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
346 uniqueValues = data[var].unique()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
347 if len(uniqueValues) == 1 or data.dtypes[var] == dtype('O') or (len(uniqueValues) == 2 and len(data.loc[~isnan(data[var]), var].unique()) == 1): # last condition: only one other value than nan
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
348 columns.remove(var)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
349 c=data[columns].corr(correlationMethod)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
350 if plotFigure:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
351 fig = plt.figure(figsize=(4+0.4*c.shape[0], 0.4*c.shape[0]))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
352 fig.add_subplot(1,1,1)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
353 #plt.imshow(np.fabs(c), interpolation='none')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
354 plt.imshow(c, vmin=-1., vmax = 1., interpolation='none', cmap = 'RdYlBu_r') # coolwarm
847
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
355 if displayNames is not None:
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
356 colnames = [displayNames.get(s.strip(), s.strip()) for s in columns]
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
357 else:
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
358 colnames = columns
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
359 #correlation.plot_corr(c, xnames = colnames, normcolor=True, title = filename)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
360 plt.xticks(range(len(colnames)), colnames, rotation=90)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
361 plt.yticks(range(len(colnames)), colnames)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
362 plt.tick_params('both', length=0)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
363 plt.subplots_adjust(bottom = 0.29)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
364 plt.colorbar()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
365 plt.title('Correlation ({})'.format(correlationMethod))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
366 plt.tight_layout()
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
367 if len(colnames) > 50:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
368 plt.subplots_adjust(left=.06)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
369 if figureFilename is not None:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
370 plt.savefig(figureFilename, dpi = 150, transparent = True)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
371 return c
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
372
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
373 def addDummies(data, variables, allVariables = True):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
374 '''Add binary dummy variables for each value of a nominal variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
375 in a pandas DataFrame'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
376 newVariables = []
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
377 for var in variables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
378 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
379 values = data[var].unique()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
380 if not allVariables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
381 values = values[:-1]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
382 for val in values:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
383 if val is not NaN:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
384 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
385 data[newVariable] = (data[var] == val)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
386 newVariables.append(newVariable)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
387 return newVariables
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
388
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
389 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = u''):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
390 '''Studies the influence of (nominal) independent variable over the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
391
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
392 Makes tests if the conditional distributions are normal
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
393 using the Shapiro-Wilk test (in which case ANOVA could be used)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
394 Implements uses the non-parametric Kruskal Wallis test'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
395 tmp = data[data[independentVariable].notnull()]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
396 independentVariableValues = sorted(tmp[independentVariable].unique().tolist())
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
397 if len(independentVariableValues) >= 2:
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
398 if saveLatex:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
399 from storage import openCheck
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
400 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w')
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
401 for x in independentVariableValues:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
402 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable])))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
403 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
404 print shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
405 if plotFigure:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
406 plt.figure()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
407 plt.boxplot([tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
408 #q25, q75 = tmp[dependentVariable].quantile([.25, .75])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
409 #plt.ylim(ymax = q75+1.5*(q75-q25))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
410 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
411 plt.title('{} vs {}'.format(dependentVariable, independentVariable))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
412 if filenamePrefix is not None:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
413 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
414 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
415 table['count'] = table['count'].astype(int)
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
416 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
417 if saveLatex:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
418 out.write('\\begin{minipage}{\\linewidth}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
419 +'\\centering\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
420 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
421 +table.to_latex(float_format = lambda x: '{:.3f}'.format(x)).encode('ascii')+'\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
422 +'\\end{minipage}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
423 +'\\ \\vspace{0.5cm}\n')
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
424 else:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
425 print table
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
426 return testResult
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
427 else:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
428 return None
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
429
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
430 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc, stdoutText = ['Removing {} (constant: {})', 'Removing {} (correlation {} with {})', 'Removing {} (no correlation: {}, p={})'], saveFiles = False, filenamePrefix = None, latexHeader = '', latexTable = None, latexFooter=''):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
431 '''Removes variables from candidate independent variables if
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
432 - if two independent variables are correlated (> maxCorrelationThreshold), one is removed
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
433 - if an independent variable is not correlated with the dependent variable (p>maxCorrelationP)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
434 Returns the remaining non-correlated variables, correlated with the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
435
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
436 correlationFunc is spearmanr or pearsonr from scipy.stats
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
437 text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
438
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
439 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable'''
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
440 from copy import copy
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
441 from pandas import DataFrame
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
442 result = copy(independentVariables)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
443 table1 = ''
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
444 table2 = {}
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
445 # constant variables
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
446 for var in independentVariables:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
447 uniqueValues = data[var].unique()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
448 if (len(uniqueValues) == 1) or (len(uniqueValues) == 2 and uniqueValues.dtype != dtype('O') and len(data.loc[~isnan(data[var]), var].unique()) == 1):
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
449 print(stdoutText[0].format(var, uniqueValues))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
450 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
451 table1 += latexTable[0].format(var, *uniqueValues)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
452 result.remove(var)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
453 # correlated variables
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
454 for v1 in copy(result):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
455 if v1 in correlations.index:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
456 for v2 in copy(result):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
457 if v2 != v1 and v2 in correlations.index:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
458 if abs(correlations.loc[v1, v2]) > maxCorrelationThreshold:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
459 if v1 in result and v2 in result:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
460 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
461 table1 += latexTable[1].format(v2, v1, correlations.loc[v1, v2])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
462 print(stdoutText[1].format(v2, v1, correlations.loc[v1, v2]))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
463 result.remove(v2)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
464 # not correlated with dependent variable
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
465 table2['Correlations'] = []
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
466 table2['Valeurs p'] = []
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
467 for var in copy(result):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
468 if data.dtypes[var] != dtype('O'):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
469 cor, p = correlationFunc(data[dependentVariable], data[var])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
470 if p > maxCorrelationP:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
471 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
472 table1 += latexTable[2].format(var, cor, p)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
473 print(stdoutText[2].format(var, cor, p))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
474 result.remove(var)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
475 else:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
476 table2['Correlations'].append(cor)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
477 table2['Valeurs p'].append(p)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
478
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
479 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
480 from storage import openCheck
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
481 out = openCheck(filenamePrefix+'-removed-variables.tex', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
482 out.write(latexHeader)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
483 out.write(table1)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
484 out.write(latexFooter)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
485 out.close()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
486 out = openCheck(filenamePrefix+'-correlations.html', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
487 table2['Variables'] = [var for var in result if data.dtypes[var] != dtype('O')]
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
488 out.write(DataFrame(table2)[['Variables', 'Correlations', 'Valeurs p']].to_html(formatters = {'Correlations': lambda x: '{:.2f}'.format(x), 'Valeurs p': lambda x: '{:.3f}'.format(x)}, index = False))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
489 out.close()
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
490 return result
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
491
841
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
492 def saveDokMatrix(filename, m, lowerTriangle = False):
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
493 'Saves a dok_matrix using savez'
841
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
494 if lowerTriangle:
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
495 keys = [k for k in m.keys() if k[0] > k[1]]
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
496 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys])
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
497 else:
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
498 savez(filename, shape = m.shape, keys = m.keys(), values = m.values())
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
499
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
500 def loadDokMatrix(filename):
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
501 'Loads a dok_matrix saved using the above saveDokMatrix'
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
502 data = npload(filename)
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
503 m = dok_matrix(tuple(data['shape']))
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
504 for k, v in zip(data['keys'], data['values']):
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
505 m[tuple(k)] = v
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
506 return m
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
507
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
508 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
509 # regression analysis using statsmodels (and pandas)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
510 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
511
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
512 # TODO make class for experiments?
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
513 # TODO add tests with public dataset downloaded from Internet (IRIS et al)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
514 def modelString(experiment, dependentVariable, independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
515 return dependentVariable+' ~ '+' + '.join([independentVariable for independentVariable in independentVariables if experiment[independentVariable]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
516
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
517 def runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
518 import statsmodels.formula.api as smf
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
519 modelStr = modelString(experiment, dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
520 if regressionType == 'ols':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
521 model = smf.ols(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
522 elif regressionType == 'gls':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
523 model = smf.gls(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
524 elif regressionType == 'rlm':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
525 model = smf.rlm(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
526 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
527 print('Unknown regression type {}. Exiting'.format(regressionType))
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
528 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
529 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
530 return model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
531
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
532 def runModels(experiments, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
533 '''Runs several models and stores 3 statistics
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
534 adjusted R2, condition number (should be small, eg < 1000)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
535 and p-value for Shapiro-Wilk test of residual normality'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
536 for i,experiment in experiments.iterrows():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
537 if experiment[independentVariables].any():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
538 results = runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols')
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
539 experiments.loc[i,'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
540 experiments.loc[i,'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
541 experiments.loc[i, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
542 experiments.loc[i,'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
543 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
544
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
545 def generateExperiments(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
546 '''Generates all possible models for including or not each independent variable'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
547 from pandas import DataFrame
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
548 experiments = {}
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
549 nIndependentVariables = len(independentVariables)
669
df6be882f325 bug corrected
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 668
diff changeset
550 if nIndependentVariables != len(set(independentVariables)):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
551 print("Duplicate variables. Exiting")
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
552 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
553 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
554 nModels = 2**nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
555 for i,var in enumerate(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
556 pattern = [False]*(2**i)+[True]*(2**i)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
557 experiments[var] = pattern*(2**(nIndependentVariables-i-1))
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
558 experiments = DataFrame(experiments)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
559 experiments['r2adj'] = 0.
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
560 experiments['condNum'] = NaN
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
561 experiments['shapiroP'] = -1
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
562 experiments['nobs'] = -1
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
563 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
564
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
565 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
566 '''Generates all possible model with the independentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
567 and runs them, saving the results in experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
568 with multiprocess option'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
569 from pandas import concat
671
849f5f8bf4b9 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 670
diff changeset
570 from multiprocessing import Pool
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
571 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
572 nModels = len(experiments)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
573 print("Running {} models with {} processes".format(nModels, nProcesses))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
574 print("IndependentVariables: {}".format(independentVariables))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
575 if nProcesses == 1:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
576 return runModels(experiments, data, dependentVariable, independentVariables, regressionType)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
577 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
578 pool = Pool(processes = nProcesses)
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
579 chunkSize = int(ceil(nModels/nProcesses))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
580 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)]
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
581 return concat([job.get() for job in jobs])
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
582
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
583 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
584 '''Forward search for best model (based on adjusted R2)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
585 Randomly starting with one variable and adding randomly variables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
586 if they improve the model
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
587
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
588 The results are added to experiments if provided as argument
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
589 Storing in experiment relies on the index being the number equal
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
590 to the binary code derived from the independent variables'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
591 from numpy.random import permutation as nppermutation
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
592 if experiments is None:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
593 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
594 nIndependentVariables = len(independentVariables)
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
595 permutation = nppermutation(range(nIndependentVariables)).tolist()
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
596 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
597 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)]))
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
598 bestModel = [False]*nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
599 currentVarNum = 0
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
600 currentR2Adj = 0.
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
601 for currentVarNum in xrange(nIndependentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
602 currentModel = [i for i in bestModel]
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
603 currentModel[currentVarNum] = True
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
604 rowIdx = sum([0]+[2**i for i in xrange(nIndependentVariables) if currentModel[permutation[i]]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
605 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in xrange(nIndependentVariables) if currentModel[permutation[i]]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
606 if experiments.loc[rowIdx, 'shapiroP'] < 0:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
607 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
608 model = modelFunc(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
609 results = model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
610 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
611 experiments.loc[rowIdx, 'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
612 experiments.loc[rowIdx, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
613 experiments.loc[rowIdx, 'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
614 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
615 currentR2Adj = experiments.loc[rowIdx, 'r2adj']
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
616 bestModel[currentVarNum] = True
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
617 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
618
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
619 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf', text = {'title-shapiro': 'Shapiro-Wilk normality test for residuals: {:.2f} (p={:.3f})', 'true-predicted.xlabel': 'Predicted values', 'true-predicted.ylabel': 'True values', 'residuals-predicted.xlabel': 'Predicted values', 'residuals-predicted.ylabel': 'Residuals'}):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
620 import statsmodels.api as sm
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
621 '''Displays some model results
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
622
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
623 3 graphics, true-predicted, residuals-predicted, '''
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
624 print(results.summary())
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
625 shapiroResult = shapiro(results.resid)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
626 print(shapiroResult)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
627 if plotFigures:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
628 fig = plt.figure(figsize=(7,6.3*(2+int(model is not None))))
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
629 if model is not None:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
630 ax = fig.add_subplot(3,1,1)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
631 plt.plot(results.predict(), model.endog, 'x')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
632 x=plt.xlim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
633 y=plt.ylim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
634 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
635 #plt.axis('equal')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
636 if text is not None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
637 plt.title(text['title-shapiro'].format(*shapiroResult))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
638 #plt.title(text['true-predicted.title'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
639 plt.xlabel(text['true-predicted.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
640 plt.ylabel(text['true-predicted.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
641 fig.add_subplot(3,1,2, sharex = ax)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
642 plt.plot(results.predict(), results.resid, 'x')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
643 nextSubplotNum = 3
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
644 else:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
645 fig.add_subplot(2,1,1)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
646 plt.plot(results.predict(), results.resid, 'x')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
647 nextSubplotNum = 2
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
648 if text is not None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
649 if model is None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
650 plt.title(text['title-shapiro'].format(*shapiroResult))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
651 plt.xlabel(text['residuals-predicted.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
652 plt.ylabel(text['residuals-predicted.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
653 qqAx = fig.add_subplot(nextSubplotNum,1,nextSubplotNum)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
654 sm.qqplot(results.resid, fit = True, line = '45', ax = qqAx)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
655 plt.axis('equal')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
656 if text is not None and 'qqplot.xlabel' in text:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
657 plt.xlabel(text['qqplot.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
658 plt.ylabel(text['qqplot.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
659 plt.tight_layout()
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
660 if filenamePrefix is not None:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
661 from storage import openCheck
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
662 out = openCheck(filenamePrefix+'-coefficients.html', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
663 out.write(results.summary().as_html())
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
664 plt.savefig(filenamePrefix+'-model-results.'+figureFileType)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
665
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
666 #########################
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
667 # iterable section
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
668 #########################
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
669
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
670 def mostCommon(L):
456
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
671 '''Returns the most frequent element in a iterable
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
672
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
673 taken from http://stackoverflow.com/questions/1518522/python-most-common-element-in-a-list'''
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
674 from itertools import groupby
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
675 from operator import itemgetter
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
676 # get an iterable of (item, iterable) pairs
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
677 SL = sorted((x, i) for i, x in enumerate(L))
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
678 # print 'SL:', SL
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
679 groups = groupby(SL, key=itemgetter(0))
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
680 # auxiliary function to get "quality" for an item
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
681 def _auxfun(g):
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
682 item, iterable = g
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
683 count = 0
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
684 min_index = len(L)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
685 for _, where in iterable:
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
686 count += 1
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
687 min_index = min(min_index, where)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
688 # print 'item %r, count %r, minind %r' % (item, count, min_index)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
689 return count, -min_index
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
690 # pick the highest-count/earliest item
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
691 return max(groups, key=_auxfun)[0]
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
692
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
693 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
694 # sequence section
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
695 #########################
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
696
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
697 class LCSS(object):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
698 '''Class that keeps the LCSS parameters
686
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
699 and puts together the various computations
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
700
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
701 the methods with names starting with _ are not to be shadowed
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
702 in child classes, who will shadow the other methods,
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
703 ie compute and computeXX methods'''
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
704 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
705 '''One should provide either a similarity function
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
706 that indicates (return bool) whether elements in the compares lists are similar
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
707
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
708 eg distance(p1, p2) < epsilon
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
709
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
710 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
711 if similarityFunc is None and metric is None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
712 print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
713 import sys
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
714 sys.exit()
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
715 elif metric is not None and epsilon is None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
716 print("Please provide a value for epsilon if using a cdist metric. Exiting")
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
717 import sys
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
718 sys.exit()
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
719 else:
741
5b91b8d97cf3 corrected bug
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 739
diff changeset
720 if similarityFunc is None and metric is not None and not isinf(delta):
737
fb60b54e1041 added warning for finite delta
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 733
diff changeset
721 print('Warning: you are using a cdist metric and a finite delta, which will make probably computation slower than using the equivalent similarityFunc (since all pairwise distances will be computed by cdist).')
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
722 self.similarityFunc = similarityFunc
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
723 self.metric = metric
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
724 self.epsilon = epsilon
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
725 self.aligned = aligned
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
726 self.delta = delta
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
727 self.lengthFunc = lengthFunc
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
728 self.subSequenceIndices = [(0,0)]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
729
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
730 def similarities(self, l1, l2, jshift=0):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
731 n1 = len(l1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
732 n2 = len(l2)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
733 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
734 if self.similarityFunc is not None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
735 for i in xrange(1,n1+1):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
736 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
737 if self.similarityFunc(l1[i-1], l2[j-1]):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
738 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
739 else:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
740 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
741 elif self.metric is not None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
742 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
743 for i in xrange(1,n1+1):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
744 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
745 if similarElements[i-1, j-1]:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
746 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
747 else:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
748 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
749
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
750
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
751 def subSequence(self, i, j):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
752 '''Returns the subsequence of two sequences
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
753 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
754 if i == 0 or j == 0:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
755 return []
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
756 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
757 return self.subSequence(i, j-1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
758 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
759 return self.subSequence(i-1, j)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
760 else:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
761 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
762
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
763 def _compute(self, _l1, _l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
764 '''returns the longest common subsequence similarity
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
765 l1 and l2 should be the right format
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
766 eg list of tuple points for cdist
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
767 or elements that can be compare using similarityFunc
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
768
607
84690dfe5560 add some functions for behaviour analysis
MohamedGomaa
parents: 574
diff changeset
769 if aligned, returns the best matching if using a finite delta by shifting the series alignments
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
770 '''
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
771 if len(_l2) < len(_l1): # l1 is the shortest
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
772 l1 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
773 l2 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
774 revertIndices = True
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
775 else:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
776 l1 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
777 l2 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
778 revertIndices = False
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
779 n1 = len(l1)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
780 n2 = len(l2)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
781
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
782 if self.aligned:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
783 lcssValues = {}
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
784 similarityTables = {}
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
785 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
786 self.similarities(l1, l2, i)
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
787 lcssValues[i] = self.similarityTable.max()
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
788 similarityTables[i] = self.similarityTable
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
789 #print self.similarityTable
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
790 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
791 self.similarityTable = similarityTables[alignmentShift]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
792 else:
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
793 alignmentShift = 0
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
794 self.similarities(l1, l2)
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
795
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
796 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
797 self.similarityTable = self.similarityTable[:min(n1, n2+alignmentShift+self.delta)+1, :min(n2, n1-alignmentShift+self.delta)+1]
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
798
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
799 if computeSubSequence:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
800 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
371
924e38c9f70e work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 370
diff changeset
801 if revertIndices:
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
802 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
803 return self.similarityTable[-1,-1]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
804
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
805 def compute(self, l1, l2, computeSubSequence = False):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
806 '''get methods are to be shadowed in child classes '''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
807 return self._compute(l1, l2, computeSubSequence)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
808
375
2ea8584aa80a making indicator LCSS work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 374
diff changeset
809 def computeAlignment(self):
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
810 return mean([j-i for i,j in self.subSequenceIndices])
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
811
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
812 def _computeNormalized(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
813 ''' compute the normalized LCSS
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
814 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
815 lengthFunc = lambda x,y:float(x,y)/2'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
816 return float(self._compute(l1, l2, computeSubSequence))/self.lengthFunc(len(l1), len(l2))
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
817
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
818 def computeNormalized(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
819 return self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
820
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
821 def _computeDistance(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
822 ''' compute the LCSS distance'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
823 return 1-self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
824
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
825 def computeDistance(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
826 return self._computeDistance(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
827
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
828 #########################
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
829 # plotting section
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
830 #########################
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
831
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
832 def plotPolygon(poly, options = ''):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
833 'Plots shapely polygon poly'
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
834 from numpy.core.multiarray import array
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
835 from matplotlib.pyplot import plot
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
836 from shapely.geometry import Polygon
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
837
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
838 tmp = array(poly.exterior)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
839 plot(tmp[:,0], tmp[:,1], options)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
840
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
841 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1):
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
842 '''for each value in X, increment by increment the initial count
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
843 returns the lists that can be plotted
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
844 to obtain a step plot increasing by one for each value in x, from first to last value
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
845 firstX and lastX should be respectively smaller and larger than all elements in X'''
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
846
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
847 sortedX = []
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
848 counts = [initialCount]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
849 for x in sorted(X):
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
850 sortedX += [x,x]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
851 counts.append(counts[-1])
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
852 counts.append(counts[-1]+increment)
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
853 counts.append(counts[-1])
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
854 return [firstX]+sortedX+[lastX], counts
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
855
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
856 class PlottingPropertyValues(object):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
857 def __init__(self, values):
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
858 self.values = values
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
859
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
860 def __getitem__(self, i):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
861 return self.values[i%len(self.values)]
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
862
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
863 markers = PlottingPropertyValues(['+', '*', ',', '.', 'x', 'D', 's', 'o'])
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
864 scatterMarkers = PlottingPropertyValues(['s','o','^','>','v','<','d','p','h','8','+','x'])
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
865
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
866 linestyles = PlottingPropertyValues(['-', '--', '-.', ':'])
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
867
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
868 colors = PlottingPropertyValues('brgmyck') # 'w'
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
869
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
870 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1):
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
871 from matplotlib.pyplot import pcolor
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
872 coords = array(indicatorMap.keys())
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
873 minX = min(coords[:,0])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
874 minY = min(coords[:,1])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
875 X = arange(minX, max(coords[:,0])+1.1)*squareSize
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
876 Y = arange(minY, max(coords[:,1])+1.1)*squareSize
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
877 C = defaultValue*ones((len(Y), len(X)))
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
878 for k,v in indicatorMap.iteritems():
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
879 C[k[1]-minY,k[0]-minX] = v
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
880 if masked:
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
881 pcolor(X, Y, ma.masked_where(C==defaultValue,C))
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
882 else:
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
883 pcolor(X, Y, C)
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
884
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
885 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
886 # Data download
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
887 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
888
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
889 def downloadECWeather(stationID, years, months = [], outputDirectoryname = '.', english = True):
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
890 '''Downloads monthly weather data from Environment Canada
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
891 If month is provided (number 1 to 12), it means hourly data for the whole month
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
892 Otherwise, means the data for each day, for the whole year
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
893
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
894 Example: MONTREAL MCTAVISH 10761
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
895 MONTREALPIERRE ELLIOTT TRUDEAU INTL A 5415
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
896
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
897 To get daily data for 2010 and 2011, downloadECWeather(10761, [2010,2011], [], '/tmp')
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
898 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp')'''
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
899 import urllib2
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
900 if english:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
901 language = 'e'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
902 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
903 language = 'f'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
904 if len(months) == 0:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
905 timeFrame = 2
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
906 months = [1]
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
907 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
908 timeFrame = 1
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
909
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
910 for year in years:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
911 for month in months:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
912 url = urllib2.urlopen('http://climat.meteo.gc.ca/climateData/bulkdata_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es'.format(language, stationID, year, month, timeFrame))
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
913 data = url.read()
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
914 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
915 if timeFrame == 1:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
916 outFilename += '-{}-hourly'.format(month)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
917 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
918 outFilename += '-daily'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
919 outFilename += '.csv'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
920 out = open(outFilename, 'w')
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
921 out.write(data)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
922 out.close()
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
923
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
924 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
925 # File I/O
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
926 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
927
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
928 def removeExtension(filename, delimiter = '.'):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
929 '''Returns the filename minus the extension (all characters after last .)'''
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
930 i = filename.rfind(delimiter)
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
931 if i>0:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
932 return filename[:i]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
933 else:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
934 return filename
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
935
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
936 def cleanFilename(s):
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
937 'cleans filenames obtained when contatenating figure characteristics'
739
25e78d756823 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 737
diff changeset
938 return s.replace(' ','-').replace('.','').replace('/','-').replace(',','')
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
939
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
940 def listfiles(dirname, extension, remove = False):
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
941 '''Returns the list of files with the extension in the directory dirname
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
942 If remove is True, the filenames are stripped from the extension'''
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
943 from os import listdir
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
944 tmp = [f for f in listdir(dirname) if f.endswith(extension)]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
945 tmp.sort()
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
946 if remove:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
947 return [removeExtension(f, extension) for f in tmp]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
948 else:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
949 return tmp
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
950
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
951 def mkdir(dirname):
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
952 'Creates a directory if it does not exist'
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
953 import os
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
954 if not os.path.exists(dirname):
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
955 os.mkdir(dirname)
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
956 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
957 print(dirname+' already exists')
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
958
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
959 def removeFile(filename):
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
960 '''Deletes the file while avoiding raising an error
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
961 if the file does not exist'''
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
962 import os
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
963 if (os.path.exists(filename)):
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
964 os.remove(filename)
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
965 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
966 print(filename+' does not exist')
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
967
42
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
968 def line2Floats(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
969 '''Returns the list of floats corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
970 return [float(x) for x in l.split(separator)]
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
971
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
972 def line2Ints(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
973 '''Returns the list of ints corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
974 return [int(x) for x in l.split(separator)]
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
975
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
976 #########################
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
977 # CLI utils
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
978 #########################
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
979
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
980 def parseCLIOptions(helpMessage, options, cliArgs, optionalOptions=[]):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
981 ''' Simple function to handle similar argument parsing
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
982 Returns the dictionary of options and their values
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
983
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
984 * cliArgs are most likely directly sys.argv
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
985 (only the elements after the first one are considered)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
986
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
987 * options should be a list of strings for getopt options,
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
988 eg ['frame=','correspondences=','video=']
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
989 A value must be provided for each option, or the program quits'''
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
990 import sys, getopt
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
991 from numpy.core.fromnumeric import all
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
992 optionValues, args = getopt.getopt(cliArgs[1:], 'h', ['help']+options+optionalOptions)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
993 optionValues = dict(optionValues)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
994
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
995 if '--help' in optionValues.keys() or '-h' in optionValues.keys():
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
996 print(helpMessage+
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
997 '\n - Compulsory options: '+' '.join([opt.replace('=','') for opt in options])+
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
998 '\n - Non-compulsory options: '+' '.join([opt.replace('=','') for opt in optionalOptions]))
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
999 sys.exit()
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1000
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1001 missingArgument = [('--'+opt.replace('=','') in optionValues.keys()) for opt in options]
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1002 if not all(missingArgument):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1003 print('Missing argument')
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1004 print(optionValues)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1005 sys.exit()
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1006
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1007 return optionValues
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
1008
397
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
1009
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
1010 #########################
553
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1011 # Profiling
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1012 #########################
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1013
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1014 def analyzeProfile(profileFilename, stripDirs = True):
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1015 '''Analyze the file produced by cProfile
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1016
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1017 obtained by for example:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1018 - call in script (for main() function in script)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1019 import cProfile, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1020 cProfile.run('main()', os.path.join(os.getcwd(),'main.profile'))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1021
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1022 - or on the command line:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1023 python -m cProfile [-o profile.bin] [-s sort] scriptfile [arg]'''
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1024 import pstats, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1025 p = pstats.Stats(os.path.join(os.pardir, profileFilename))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1026 if stripDirs:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1027 p.strip_dirs()
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1028 p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1029 p.print_stats(.2)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1030 #p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1031 # p.print_callees(.1, 'int_prediction.py:')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1032 return p
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1033
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1034 #########################
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1035 # running tests
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1036 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1037
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1038 if __name__ == "__main__":
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1039 import doctest
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1040 import unittest
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1041 suite = doctest.DocFileSuite('tests/utils.txt')
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1042 #suite = doctest.DocTestSuite()
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1043 unittest.TextTestRunner().run(suite)
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1044 #doctest.testmod()
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1045 #doctest.testfile("example.txt")