Mercurial Hosting > traffic-intelligence
annotate trafficintelligence/utils.py @ 1028:cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 15 Jun 2018 11:19:10 -0400 |
parents | python/utils.py@a13f47c8931d |
children | c6cf75a2ed08 |
rev | line source |
---|---|
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
2 # -*- coding: utf-8 -*- |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
3 ''' Generic utilities.''' |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
4 |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
5 import matplotlib.pyplot as plt |
397
b36b00dd27c3
added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
395
diff
changeset
|
6 from datetime import time, datetime |
971
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
7 from argparse import ArgumentTypeError |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
8 from pathlib import Path |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
9 from math import sqrt, ceil, floor |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
10 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
11 from scipy.spatial import distance |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
12 from scipy.sparse import dok_matrix |
1023
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
14 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
15 |
421
4fce27946c60
first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
405
diff
changeset
|
16 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
4fce27946c60
first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
405
diff
changeset
|
17 |
969
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
18 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720 |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
19 |
185
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
20 ######################### |
742
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
21 # Strings |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
22 ######################### |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
23 |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
24 def upperCaseFirstLetter(s): |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
25 words = s.split(' ') |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
26 lowerWords = [w[0].upper()+w[1:].lower() for w in words] |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
27 return ' '.join(lowerWords) |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
28 |
971
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
29 class TimeConverter: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
30 def __init__(self, datetimeFormat = datetimeFormat): |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
31 self.datetimeFormat = datetimeFormat |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
32 |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
33 def convert(self, s): |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
34 try: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
35 return datetime.strptime(s, self.datetimeFormat) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
36 except ValueError: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
37 msg = "Not a valid date: '{0}'.".format(s) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
38 raise ArgumentTypeError(msg) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
39 |
742
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
40 ######################### |
185
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
41 # Enumerations |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
42 ######################### |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
43 |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
44 def inverseEnumeration(l): |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
45 'Returns the dictionary that provides for each element in the input list its index in the input list' |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
46 result = {} |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
47 for i,x in enumerate(l): |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
48 result[x] = i |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
49 return result |
155
f03fe3d6d0c8
added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
152
diff
changeset
|
50 |
f03fe3d6d0c8
added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
152
diff
changeset
|
51 ######################### |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
52 # Simple statistics |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
53 ######################### |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
54 |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
55 def logNormalMeanVar(loc, scale): |
687
de278c5e65f6
minor comments for lognormal parameters (numpy and usual names differ)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
686
diff
changeset
|
56 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution |
854
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
57 https://en.wikipedia.org/wiki/Log-normal_distribution |
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
58 |
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
59 same as lognorm.stats(scale, 0, exp(loc))''' |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
60 mean = exp(loc+(scale**2)/2) |
854
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
61 var = (exp(scale**2)-1)*exp(2*loc+scale**2) |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
62 return mean, var |
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
63 |
855
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
64 def fitLogNormal(x): |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
65 'returns the fitted location and scale of the lognormal (general definition)' |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
66 shape, loc, scale = lognorm.fit(x, floc=0.) |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
67 return log(scale), shape |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
68 |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
69 def sampleSize(stdev, tolerance, percentConfidence, nRoundingDigits = None, printLatex = False): |
301
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
70 from scipy.stats.distributions import norm |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
71 if nRoundingDigits is None: |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
72 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) # 1.-(100-percentConfidence)/200. |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
73 else: |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
74 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), nRoundingDigits) |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
75 stdev = round(stdev, nRoundingDigits) |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
76 tolerance = round(tolerance, nRoundingDigits) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
77 if printLatex: |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
78 print('$z_{{{}}}^2\\frac{{s^2}}{{e^2}}={}^2\\frac{{{}^2}}{{{}^2}}$'.format(0.5+percentConfidence/200.,k, stdev, tolerance)) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
79 return (k*stdev/tolerance)**2 |
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
80 |
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
81 def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False): |
499
0a93afea8243
alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
491
diff
changeset
|
82 '''if trueStd, use normal distribution, otherwise, Student |
0a93afea8243
alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
491
diff
changeset
|
83 |
973 | 84 Use otherwise t.interval or norm.interval for the boundaries |
85 ex: norm.interval(0.95) | |
86 t.interval(0.95, nSamples-1)''' | |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
87 from scipy.stats.distributions import norm, t |
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
88 if trueStd: |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
89 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
90 else: # use Student |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
91 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1), 2) |
301
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
92 e = k*stdev/sqrt(nSamples) |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
93 if printLatex: |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
94 print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples)) |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
95 return mean-e, mean+e |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
96 |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
97 def computeChi2(expected, observed): |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
98 '''Returns the Chi2 statistics''' |
876
c7e72d758049
minor update to avoid integer issue
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
859
diff
changeset
|
99 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
100 |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
101 class generateDistribution(rv_continuous): |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
102 def __init__(self, values, probabilities): |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
103 '''The values (and corresponding probabilities) are supposed to be sorted by value |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
104 for v, p in zip(values, probabilities): P(X<=v)=p''' |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
105 assert probabilities[0]==0 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
106 self.values = values |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
107 self.probabilities = probabilities |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
108 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
109 def _cdf(self, x): |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
110 if x < self.values[0]: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
111 return self.probabilities[0] |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
112 else: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
113 i=0 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
114 while i+1<len(self.values) and self.values[i+1] < x: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
115 i += 1 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
116 if i == len(self.values)-1: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
117 return self.probabilities[-1] |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
118 else: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
119 return (self.probabilities[i+1]-self.probabilities[i])/(self.values[i+1]-self.values[i]) |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
120 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
121 class DistributionSample(object): |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
122 def nSamples(self): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
123 return sum(self.counts) |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
124 |
588
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
125 def cumulativeDensityFunction(sample, normalized = False): |
276 | 126 '''Returns the cumulative density function of the sample of a random variable''' |
588
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
127 xaxis = sorted(sample) |
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
128 counts = arange(1,len(sample)+1) # dtype = float |
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
129 if normalized: |
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
130 counts /= float(len(sample)) |
197
2788b2827670
simple cumulatie function distribution computation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
185
diff
changeset
|
131 return xaxis, counts |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
132 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
133 class DiscreteDistributionSample(DistributionSample): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
134 '''Class to represent a sample of a distribution for a discrete random variable''' |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
135 def __init__(self, categories, counts): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
136 self.categories = categories |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
137 self.counts = counts |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
138 |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
139 def mean(self): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
140 result = [float(x*y) for x,y in zip(self.categories, self.counts)] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
141 return npsum(result)/self.nSamples() |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
142 |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
143 def var(self, mean = None): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
144 if not mean: |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
145 m = self.mean() |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
146 else: |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
147 m = mean |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
148 result = 0. |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
149 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
150 return npsum(squares)/(self.nSamples()-1) |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
151 |
86
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
152 def referenceCounts(self, probability): |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
153 '''probability is a function that returns the probability of the random variable for the category values''' |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
154 refProba = [probability(c) for c in self.categories] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
155 refProba[-1] = 1-npsum(refProba[:-1]) |
86
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
156 refCounts = [r*self.nSamples() for r in refProba] |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
157 return refCounts, refProba |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
158 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
159 class ContinuousDistributionSample(DistributionSample): |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
160 '''Class to represent a sample of a distribution for a continuous random variable |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
161 with the number of observations for each interval |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
162 intervals (categories variable) are defined by their left limits, the last one being the right limit |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
163 categories contain therefore one more element than the counts''' |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
164 def __init__(self, categories, counts): |
276 | 165 # todo add samples for initialization and everything to None? (or setSamples?) |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
166 self.categories = categories |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
167 self.counts = counts |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
168 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
169 @staticmethod |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
170 def generate(sample, categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
171 if min(sample) < min(categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
172 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories))) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
173 if max(sample) > max(categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
174 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories))) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
175 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1)) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
176 for s in sample: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
177 i = 0 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
178 while i<len(dist.categories) and dist.categories[i] <= s: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
179 i += 1 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
180 if i <= len(dist.counts): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
181 dist.counts[i-1] += 1 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
182 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i])) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
183 else: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
184 print('Element {} is not in the categories'.format(s)) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
185 return dist |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
186 |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
187 def mean(self): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
188 result = 0. |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
189 for i in range(len(self.counts)-1): |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
190 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
191 return result/self.nSamples() |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
192 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
193 def var(self, mean = None): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
194 if not mean: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
195 m = self.mean() |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
196 else: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
197 m = mean |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
198 result = 0. |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
199 for i in range(len(self.counts)-1): |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
200 mid = (self.categories[i]+self.categories[i+1])/2 |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
201 result += self.counts[i]*(mid - m)*(mid - m) |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
202 return result/(self.nSamples()-1) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
203 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
204 def referenceCounts(self, cdf): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
205 '''cdf is a cumulative distribution function |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
206 returning the probability of the variable being less that x''' |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
207 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
208 # for inter in self.categories: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
209 # refCumulativeCounts.append(cdf(inter[1])) |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
210 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]] |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
211 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
212 refProba = [refCumulativeCounts[0]] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
213 for i in xrange(1,len(refCumulativeCounts)): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
214 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
215 refProba.append(1-refCumulativeCounts[-1]) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
216 refCounts = [p*self.nSamples() for p in refProba] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
217 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
218 return refCounts, refProba |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
219 |
77
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
220 def printReferenceCounts(self, refCounts=None): |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
221 if refCounts: |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
222 ref = refCounts |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
223 else: |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
224 ref = self.referenceCounts |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
225 for i in xrange(len(ref[0])): |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
226 print('{0}-{1} & {2:0.3} & {3:0.3} \\\\'.format(self.categories[i],self.categories[i+1],ref[1][i], ref[0][i])) |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
227 |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
228 |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
229 ######################### |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
230 # maths section |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
231 ######################### |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
232 |
433
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
233 # def kernelSmoothing(sampleX, X, Y, weightFunc, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
234 # '''Returns a smoothed weighted version of Y at the predefined values of sampleX |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
235 # Sum_x weight(sample_x,x) * y(x)''' |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
236 # from numpy import zeros, array |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
237 # smoothed = zeros(len(sampleX)) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
238 # for i,x in enumerate(sampleX): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
239 # weights = array([weightFunc(x,xx, halfwidth) for xx in X]) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
240 # if sum(weights)>0: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
241 # smoothed[i] = sum(weights*Y)/sum(weights) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
242 # else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
243 # smoothed[i] = 0 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
244 # return smoothed |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
245 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
246 def kernelSmoothing(x, X, Y, weightFunc, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
247 '''Returns the smoothed estimate of (X,Y) at x |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
248 Sum_x weight(sample_x,x) * y(x)''' |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
249 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X]) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
250 if sum(weights)>0: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
251 return sum(weights*Y)/sum(weights) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
252 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
253 return 0 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
254 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
255 def uniform(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
256 if abs(center-x)<halfwidth: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
257 return 1. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
258 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
259 return 0. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
260 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
261 def gaussian(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
262 return exp(-((center-x)/halfwidth)**2/2) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
263 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
264 def epanechnikov(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
265 diff = abs(center-x) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
266 if diff<halfwidth: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
267 return 1.-(diff/halfwidth)**2 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
268 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
269 return 0. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
270 |
434 | 271 def triangular(center, x, halfwidth): |
272 diff = abs(center-x) | |
273 if diff<halfwidth: | |
274 return 1.-abs(diff/halfwidth) | |
275 else: | |
276 return 0. | |
433
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
277 |
518
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
278 def medianSmoothing(x, X, Y, halfwidth): |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
279 '''Returns the media of Y's corresponding to X's in the interval [x-halfwidth, x+halfwidth]''' |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
280 return median([y for observedx, y in zip(X,Y) if abs(x-observedx)<halfwidth]) |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
281 |
521
3707eeb20f25
changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
518
diff
changeset
|
282 def argmaxDict(d): |
561
ee45c6eb6d49
added Mohamed Gomaa Mohamed function to smooth object trajectories
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
553
diff
changeset
|
283 return max(d, key=d.get) |
279
3af4c267a7bf
generic simple LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
276
diff
changeset
|
284 |
837
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
285 def deltaFrames(t1, t2, frameRate): |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
286 '''Returns the number of frames between t1 and t2 |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
287 positive if t1<=t2, negative otherwise''' |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
288 if t1 > t2: |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
289 return -(t1-t2).seconds*frameRate |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
290 else: |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
291 return (t2-t1).seconds*frameRate |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
292 |
395
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
293 def framesToTime(nFrames, frameRate, initialTime = time()): |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
294 '''returns a datetime.time for the time in hour, minutes and seconds |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
295 initialTime is a datetime.time''' |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
296 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) |
261
4aa792cb0fa9
changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
248
diff
changeset
|
297 h = int(floor(seconds/3600.)) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
298 seconds = seconds - h*3600 |
261
4aa792cb0fa9
changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
248
diff
changeset
|
299 m = int(floor(seconds/60)) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
300 seconds = seconds - m*60 |
262
a048066bd20f
correcting bug in framesToTime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
261
diff
changeset
|
301 return time(h, m, seconds) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
302 |
381
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
303 def timeToFrames(t, frameRate): |
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
304 return frameRate*(t.hour*3600+t.minute*60+t.second) |
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
305 |
241
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
306 def sortXY(X,Y): |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
307 'returns the sorted (x, Y(x)) sorted on X' |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
308 D = {} |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
309 for x, y in zip(X,Y): |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
310 D[x]=y |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
311 xsorted = sorted(D.keys()) |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
312 return xsorted, [D[x] for x in xsorted] |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
313 |
733 | 314 def compareLengthForSort(i, j): |
315 if len(i) < len(j): | |
316 return -1 | |
317 elif len(i) == len(j): | |
318 return 0 | |
319 else: | |
320 return 1 | |
321 | |
322 def sortByLength(instances, reverse = False): | |
323 '''Returns a new list with the instances sorted by length (method __len__) | |
324 reverse is passed to sorted''' | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
325 return sorted(instances, key = len, reverse = reverse) |
733 | 326 |
32 | 327 def ceilDecimals(v, nDecimals): |
328 '''Rounds the number at the nth decimal | |
329 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
330 tens = 10**nDecimals |
32 | 331 return ceil(v*tens)/tens |
332 | |
152
74b1fc68d4df
re-organized code to avoid cyclic python module dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
116
diff
changeset
|
333 def inBetween(bound1, bound2, x): |
698
8d99a9e16644
added clarification comments
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
689
diff
changeset
|
334 'useful if one does not know the order of bound1/bound2' |
569
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
335 return bound1 <= x <= bound2 or bound2 <= x <= bound1 |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
336 |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
337 def pointDistanceL2(x1,y1,x2,y2): |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
338 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)''' |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
339 return sqrt((x2-x1)**2+(y2-y1)**2) |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
340 |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
341 def crossProduct(l1, l2): |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
342 return l1[0]*l2[1]-l1[1]*l2[0] |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
343 |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
344 def cat_mvgavg(cat_list, halfWidth): |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
345 ''' Return a list of categories/values smoothed according to a window. |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
346 halfWidth is the search radius on either side''' |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
347 from copy import deepcopy |
659
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
348 smoothed = deepcopy(cat_list) |
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
349 for point in range(len(cat_list)): |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
350 lower_bound_check = max(0,point-halfWidth) |
659
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
351 upper_bound_check = min(len(cat_list)-1,point+halfWidth+1) |
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
352 window_values = cat_list[lower_bound_check:upper_bound_check] |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
353 smoothed[point] = max(set(window_values), key=window_values.count) |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
354 return smoothed |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
355 |
547 | 356 def filterMovingWindow(inputSignal, halfWidth): |
29
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
357 '''Returns an array obtained after the smoothing of the input by a moving average |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
358 The first and last points are copied from the original.''' |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
359 width = float(halfWidth*2+1) |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
360 win = ones(width,'d') |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
361 result = convolve(win/width,array(inputSignal),'same') |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
362 result[:halfWidth] = inputSignal[:halfWidth] |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
363 result[-halfWidth:] = inputSignal[-halfWidth:] |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
364 return result |
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
365 |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
366 def linearRegression(x, y, deg = 1, plotData = False): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
367 '''returns the least square estimation of the linear regression of y = ax+b |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
368 as well as the plot''' |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
369 from numpy.lib.polynomial import polyfit |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
370 from numpy.core.multiarray import arange |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
371 coef = polyfit(x, y, deg) |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
372 if plotData: |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
373 def poly(x): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
374 result = 0 |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
375 for i in range(len(coef)): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
376 result += coef[i]*x**(len(coef)-i-1) |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
377 return result |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
378 plt.plot(x, y, 'x') |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
379 xx = arange(min(x), max(x),(max(x)-min(x))/1000) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
380 plt.plot(xx, [poly(z) for z in xx]) |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
381 return coef |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
382 |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
383 def correlation(data, correlationMethod = 'pearson', plotFigure = False, displayNames = None, figureFilename = None): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
384 '''Computes (and displays) the correlation matrix for a pandas DataFrame''' |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
385 columns = data.columns.tolist() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
386 for var in data.columns: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
387 uniqueValues = data[var].unique() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
388 if len(uniqueValues) == 1 or data.dtypes[var] == dtype('O') or (len(uniqueValues) == 2 and len(data.loc[~isnan(data[var]), var].unique()) == 1): # last condition: only one other value than nan |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
389 columns.remove(var) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
390 c=data[columns].corr(correlationMethod) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
391 if plotFigure: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
392 fig = plt.figure(figsize=(4+0.4*c.shape[0], 0.4*c.shape[0])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
393 fig.add_subplot(1,1,1) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
394 #plt.imshow(np.fabs(c), interpolation='none') |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
395 plt.imshow(c, vmin=-1., vmax = 1., interpolation='none', cmap = 'RdYlBu_r') # coolwarm |
847
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
396 if displayNames is not None: |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
397 colnames = [displayNames.get(s.strip(), s.strip()) for s in columns] |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
398 else: |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
399 colnames = columns |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
400 #correlation.plot_corr(c, xnames = colnames, normcolor=True, title = filename) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
401 plt.xticks(range(len(colnames)), colnames, rotation=90) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
402 plt.yticks(range(len(colnames)), colnames) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
403 plt.tick_params('both', length=0) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
404 plt.subplots_adjust(bottom = 0.29) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
405 plt.colorbar() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
406 plt.title('Correlation ({})'.format(correlationMethod)) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
407 plt.tight_layout() |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
408 if len(colnames) > 50: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
409 plt.subplots_adjust(left=.06) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
410 if figureFilename is not None: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
411 plt.savefig(figureFilename, dpi = 150, transparent = True) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
412 return c |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
413 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
414 def addDummies(data, variables, allVariables = True): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
415 '''Add binary dummy variables for each value of a nominal variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
416 in a pandas DataFrame''' |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
417 newVariables = [] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
418 for var in variables: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
419 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
420 values = data[var].unique() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
421 if not allVariables: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
422 values = values[:-1] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
423 for val in values: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
424 if val is not NaN: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
425 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
426 data[newVariable] = (data[var] == val) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
427 newVariables.append(newVariable) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
428 return newVariables |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
429 |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
430 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = ''): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
431 '''Studies the influence of (nominal) independent variable over the dependent variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
432 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
433 Makes tests if the conditional distributions are normal |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
434 using the Shapiro-Wilk test (in which case ANOVA could be used) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
435 Implements uses the non-parametric Kruskal Wallis test''' |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
436 tmp = data[data[independentVariable].notnull()] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
437 independentVariableValues = sorted(tmp[independentVariable].unique().tolist()) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
438 if len(independentVariableValues) >= 2: |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
439 if saveLatex: |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
440 from storage import openCheck |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
441 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w') |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
442 for x in independentVariableValues: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
443 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable]))) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
444 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3: |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
973
diff
changeset
|
445 print(shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
446 if plotFigure: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
447 plt.figure() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
448 plt.boxplot([tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
449 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
450 plt.title('{} vs {}'.format(dependentVariable, independentVariable)) |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
451 if filenamePrefix is not None: |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
452 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
453 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) |
676 | 454 table['count'] = table['count'].astype(int) |
455 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) | |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
456 if saveLatex: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
457 out.write('\\begin{minipage}{\\linewidth}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
458 +'\\centering\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
459 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
460 +table.to_latex(float_format = lambda x: '{:.3f}'.format(x)).encode('ascii')+'\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
461 +'\\end{minipage}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
462 +'\\ \\vspace{0.5cm}\n') |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
463 else: |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
973
diff
changeset
|
464 print(table) |
676 | 465 return testResult |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
466 else: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
467 return None |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
468 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
469 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc, stdoutText = ['Removing {} (constant: {})', 'Removing {} (correlation {} with {})', 'Removing {} (no correlation: {}, p={})'], saveFiles = False, filenamePrefix = None, latexHeader = '', latexTable = None, latexFooter=''): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
470 '''Removes variables from candidate independent variables if |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
471 - if two independent variables are correlated (> maxCorrelationThreshold), one is removed |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
472 - if an independent variable is not correlated with the dependent variable (p>maxCorrelationP) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
473 Returns the remaining non-correlated variables, correlated with the dependent variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
474 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
475 correlationFunc is spearmanr or pearsonr from scipy.stats |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
476 text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
477 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
478 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable''' |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
479 from copy import copy |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
480 from pandas import DataFrame |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
481 result = copy(independentVariables) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
482 table1 = '' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
483 table2 = {} |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
484 # constant variables |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
485 for var in independentVariables: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
486 uniqueValues = data[var].unique() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
487 if (len(uniqueValues) == 1) or (len(uniqueValues) == 2 and uniqueValues.dtype != dtype('O') and len(data.loc[~isnan(data[var]), var].unique()) == 1): |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
488 print(stdoutText[0].format(var, uniqueValues)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
489 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
490 table1 += latexTable[0].format(var, *uniqueValues) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
491 result.remove(var) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
492 # correlated variables |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
493 for v1 in copy(result): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
494 if v1 in correlations.index: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
495 for v2 in copy(result): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
496 if v2 != v1 and v2 in correlations.index: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
497 if abs(correlations.loc[v1, v2]) > maxCorrelationThreshold: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
498 if v1 in result and v2 in result: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
499 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
500 table1 += latexTable[1].format(v2, v1, correlations.loc[v1, v2]) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
501 print(stdoutText[1].format(v2, v1, correlations.loc[v1, v2])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
502 result.remove(v2) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
503 # not correlated with dependent variable |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
504 table2['Correlations'] = [] |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
505 table2['Valeurs p'] = [] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
506 for var in copy(result): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
507 if data.dtypes[var] != dtype('O'): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
508 cor, p = correlationFunc(data[dependentVariable], data[var]) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
509 if p > maxCorrelationP: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
510 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
511 table1 += latexTable[2].format(var, cor, p) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
512 print(stdoutText[2].format(var, cor, p)) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
513 result.remove(var) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
514 else: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
515 table2['Correlations'].append(cor) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
516 table2['Valeurs p'].append(p) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
517 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
518 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
519 from storage import openCheck |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
520 out = openCheck(filenamePrefix+'-removed-variables.tex', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
521 out.write(latexHeader) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
522 out.write(table1) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
523 out.write(latexFooter) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
524 out.close() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
525 out = openCheck(filenamePrefix+'-correlations.html', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
526 table2['Variables'] = [var for var in result if data.dtypes[var] != dtype('O')] |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
527 out.write(DataFrame(table2)[['Variables', 'Correlations', 'Valeurs p']].to_html(formatters = {'Correlations': lambda x: '{:.2f}'.format(x), 'Valeurs p': lambda x: '{:.3f}'.format(x)}, index = False)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
528 out.close() |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
529 return result |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
530 |
841 | 531 def saveDokMatrix(filename, m, lowerTriangle = False): |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
532 'Saves a dok_matrix using savez' |
841 | 533 if lowerTriangle: |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
534 keys = [k for k in m if k[0] > k[1]] |
841 | 535 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys]) |
536 else: | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
537 savez(filename, shape = m.shape, keys = list(m.keys()), values = list(m.values())) |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
538 |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
539 def loadDokMatrix(filename): |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
540 'Loads a dok_matrix saved using the above saveDokMatrix' |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
541 data = npload(filename) |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
542 m = dok_matrix(tuple(data['shape'])) |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
543 for k, v in zip(data['keys'], data['values']): |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
544 m[tuple(k)] = v |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
545 return m |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
546 |
1023
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
547 def aggregationFunction(funcStr, centile = 50): |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
548 '''return the numpy function corresponding to funcStr |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
549 centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles''' |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
550 if funcStr == 'median': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
551 return median |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
552 elif funcStr == 'mean': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
553 return mean |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
554 elif funcStr == 'centile': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
555 return lambda x: percentile(x, centile) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
556 elif funcStr == '85centile': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
557 return lambda x: percentile(x, 85) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
558 else: |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
559 print('Unknown aggregation method: {}'.format(funcStr)) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
560 return None |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
561 |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
562 ######################### |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
563 # regression analysis using statsmodels (and pandas) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
564 ######################### |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
565 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
566 # TODO make class for experiments? |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
567 # TODO add tests with public dataset downloaded from Internet (IRIS et al) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
568 def modelString(experiment, dependentVariable, independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
569 return dependentVariable+' ~ '+' + '.join([independentVariable for independentVariable in independentVariables if experiment[independentVariable]]) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
570 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
571 def runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols'): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
572 import statsmodels.formula.api as smf |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
573 modelStr = modelString(experiment, dependentVariable, independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
574 if regressionType == 'ols': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
575 model = smf.ols(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
576 elif regressionType == 'gls': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
577 model = smf.gls(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
578 elif regressionType == 'rlm': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
579 model = smf.rlm(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
580 else: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
581 print('Unknown regression type {}. Exiting'.format(regressionType)) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
582 import sys |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
583 sys.exit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
584 return model.fit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
585 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
586 def runModels(experiments, data, dependentVariable, independentVariables, regressionType = 'ols'): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
587 '''Runs several models and stores 3 statistics |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
588 adjusted R2, condition number (should be small, eg < 1000) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
589 and p-value for Shapiro-Wilk test of residual normality''' |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
590 for i,experiment in experiments.iterrows(): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
591 if experiment[independentVariables].any(): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
592 results = runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols') |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
593 experiments.loc[i,'r2adj'] = results.rsquared_adj |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
594 experiments.loc[i,'condNum'] = results.condition_number |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
595 experiments.loc[i, 'shapiroP'] = shapiro(results.resid)[1] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
596 experiments.loc[i,'nobs'] = int(results.nobs) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
597 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
598 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
599 def generateExperiments(independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
600 '''Generates all possible models for including or not each independent variable''' |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
601 from pandas import DataFrame |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
602 experiments = {} |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
603 nIndependentVariables = len(independentVariables) |
669 | 604 if nIndependentVariables != len(set(independentVariables)): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
605 print("Duplicate variables. Exiting") |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
606 import sys |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
607 sys.exit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
608 nModels = 2**nIndependentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
609 for i,var in enumerate(independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
610 pattern = [False]*(2**i)+[True]*(2**i) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
611 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
612 experiments = DataFrame(experiments) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
613 experiments['r2adj'] = 0. |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
614 experiments['condNum'] = NaN |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
615 experiments['shapiroP'] = -1 |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
616 experiments['nobs'] = -1 |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
617 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
618 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
619 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
620 '''Generates all possible model with the independentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
621 and runs them, saving the results in experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
622 with multiprocess option''' |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
623 from pandas import concat |
671
849f5f8bf4b9
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
670
diff
changeset
|
624 from multiprocessing import Pool |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
625 experiments = generateExperiments(independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
626 nModels = len(experiments) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
627 print("Running {} models with {} processes".format(nModels, nProcesses)) |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
628 print("IndependentVariables: {}".format(independentVariables)) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
629 if nProcesses == 1: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
630 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
631 else: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
632 pool = Pool(processes = nProcesses) |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
633 chunkSize = int(ceil(nModels/nProcesses)) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
634 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
635 return concat([job.get() for job in jobs]) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
636 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
637 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
638 '''Forward search for best model (based on adjusted R2) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
639 Randomly starting with one variable and adding randomly variables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
640 if they improve the model |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
641 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
642 The results are added to experiments if provided as argument |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
643 Storing in experiment relies on the index being the number equal |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
644 to the binary code derived from the independent variables''' |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
645 from numpy.random import permutation as nppermutation |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
646 if experiments is None: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
647 experiments = generateExperiments(independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
648 nIndependentVariables = len(independentVariables) |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
649 permutation = nppermutation(list(range(nIndependentVariables))) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
650 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
651 print('Tested variables '+', '.join([variableMapping[i] for i in range(nIndependentVariables)])) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
652 bestModel = [False]*nIndependentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
653 currentVarNum = 0 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
654 currentR2Adj = 0. |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
655 for currentVarNum in range(nIndependentVariables): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
656 currentModel = [i for i in bestModel] |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
657 currentModel[currentVarNum] = True |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
658 rowIdx = sum([0]+[2**i for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
659 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
660 if experiments.loc[rowIdx, 'shapiroP'] < 0: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
661 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
662 model = modelFunc(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
663 results = model.fit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
664 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
665 experiments.loc[rowIdx, 'condNum'] = results.condition_number |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
666 experiments.loc[rowIdx, 'shapiroP'] = shapiro(results.resid)[1] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
667 experiments.loc[rowIdx, 'nobs'] = int(results.nobs) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
668 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
669 currentR2Adj = experiments.loc[rowIdx, 'r2adj'] |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
670 bestModel[currentVarNum] = True |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
671 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
672 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
673 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf', text = {'title-shapiro': 'Shapiro-Wilk normality test for residuals: {:.2f} (p={:.3f})', 'true-predicted.xlabel': 'Predicted values', 'true-predicted.ylabel': 'True values', 'residuals-predicted.xlabel': 'Predicted values', 'residuals-predicted.ylabel': 'Residuals'}): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
674 import statsmodels.api as sm |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
675 '''Displays some model results |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
676 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
677 3 graphics, true-predicted, residuals-predicted, ''' |
676 | 678 print(results.summary()) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
679 shapiroResult = shapiro(results.resid) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
680 print(shapiroResult) |
676 | 681 if plotFigures: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
682 fig = plt.figure(figsize=(7,6.3*(2+int(model is not None)))) |
676 | 683 if model is not None: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
684 ax = fig.add_subplot(3,1,1) |
676 | 685 plt.plot(results.predict(), model.endog, 'x') |
686 x=plt.xlim() | |
687 y=plt.ylim() | |
688 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') | |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
689 #plt.axis('equal') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
690 if text is not None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
691 plt.title(text['title-shapiro'].format(*shapiroResult)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
692 #plt.title(text['true-predicted.title']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
693 plt.xlabel(text['true-predicted.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
694 plt.ylabel(text['true-predicted.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
695 fig.add_subplot(3,1,2, sharex = ax) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
696 plt.plot(results.predict(), results.resid, 'x') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
697 nextSubplotNum = 3 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
698 else: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
699 fig.add_subplot(2,1,1) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
700 plt.plot(results.predict(), results.resid, 'x') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
701 nextSubplotNum = 2 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
702 if text is not None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
703 if model is None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
704 plt.title(text['title-shapiro'].format(*shapiroResult)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
705 plt.xlabel(text['residuals-predicted.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
706 plt.ylabel(text['residuals-predicted.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
707 qqAx = fig.add_subplot(nextSubplotNum,1,nextSubplotNum) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
708 sm.qqplot(results.resid, fit = True, line = '45', ax = qqAx) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
709 plt.axis('equal') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
710 if text is not None and 'qqplot.xlabel' in text: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
711 plt.xlabel(text['qqplot.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
712 plt.ylabel(text['qqplot.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
713 plt.tight_layout() |
676 | 714 if filenamePrefix is not None: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
715 from storage import openCheck |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
716 out = openCheck(filenamePrefix+'-coefficients.html', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
717 out.write(results.summary().as_html()) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
718 plt.savefig(filenamePrefix+'-model-results.'+figureFileType) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
719 |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
720 ######################### |
455
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
721 # iterable section |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
722 ######################### |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
723 |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
724 def mostCommon(L): |
456 | 725 '''Returns the most frequent element in a iterable |
726 | |
727 taken from http://stackoverflow.com/questions/1518522/python-most-common-element-in-a-list''' | |
455
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
728 from itertools import groupby |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
729 from operator import itemgetter |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
730 # get an iterable of (item, iterable) pairs |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
731 SL = sorted((x, i) for i, x in enumerate(L)) |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
732 # print 'SL:', SL |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
733 groups = groupby(SL, key=itemgetter(0)) |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
734 # auxiliary function to get "quality" for an item |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
735 def _auxfun(g): |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
736 item, iterable = g |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
737 count = 0 |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
738 min_index = len(L) |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
739 for _, where in iterable: |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
740 count += 1 |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
741 min_index = min(min_index, where) |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
742 # print 'item %r, count %r, minind %r' % (item, count, min_index) |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
743 return count, -min_index |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
744 # pick the highest-count/earliest item |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
745 return max(groups, key=_auxfun)[0] |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
746 |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
747 ######################### |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
748 # sequence section |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
749 ######################### |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
750 |
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
659
diff
changeset
|
751 class LCSS(object): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
752 '''Class that keeps the LCSS parameters |
686
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
753 and puts together the various computations |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
754 |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
755 the methods with names starting with _ are not to be shadowed |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
756 in child classes, who will shadow the other methods, |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
757 ie compute and computeXX methods''' |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
758 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
759 '''One should provide either a similarity function |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
760 that indicates (return bool) whether elements in the compares lists are similar |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
761 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
762 eg distance(p1, p2) < epsilon |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
763 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
764 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
765 if similarityFunc is None and metric is None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
766 print("No way to compute LCSS, similarityFunc and metric are None. Exiting") |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
767 import sys |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
768 sys.exit() |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
769 elif metric is not None and epsilon is None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
770 print("Please provide a value for epsilon if using a cdist metric. Exiting") |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
771 import sys |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
772 sys.exit() |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
773 else: |
741 | 774 if similarityFunc is None and metric is not None and not isinf(delta): |
737
fb60b54e1041
added warning for finite delta
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
733
diff
changeset
|
775 print('Warning: you are using a cdist metric and a finite delta, which will make probably computation slower than using the equivalent similarityFunc (since all pairwise distances will be computed by cdist).') |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
776 self.similarityFunc = similarityFunc |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
777 self.metric = metric |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
778 self.epsilon = epsilon |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
779 self.aligned = aligned |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
780 self.delta = delta |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
781 self.lengthFunc = lengthFunc |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
782 self.subSequenceIndices = [(0,0)] |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
783 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
784 def similarities(self, l1, l2, jshift=0): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
785 n1 = len(l1) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
786 n2 = len(l2) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
787 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
788 if self.similarityFunc is not None: |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
789 for i in range(1,n1+1): |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
790 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
791 if self.similarityFunc(l1[i-1], l2[j-1]): |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
792 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
793 else: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
794 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
795 elif self.metric is not None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
796 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
797 for i in range(1,n1+1): |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
798 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
799 if similarElements[i-1, j-1]: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
800 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
801 else: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
802 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
803 |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
804 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
805 def subSequence(self, i, j): |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
806 '''Returns the subsequence of two sequences |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
807 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
808 if i == 0 or j == 0: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
809 return [] |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
810 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
811 return self.subSequence(i, j-1) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
812 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
813 return self.subSequence(i-1, j) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
814 else: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
815 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
816 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
817 def _compute(self, _l1, _l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
818 '''returns the longest common subsequence similarity |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
819 l1 and l2 should be the right format |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
820 eg list of tuple points for cdist |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
821 or elements that can be compare using similarityFunc |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
822 |
607 | 823 if aligned, returns the best matching if using a finite delta by shifting the series alignments |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
824 ''' |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
825 if len(_l2) < len(_l1): # l1 is the shortest |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
826 l1 = _l2 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
827 l2 = _l1 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
828 revertIndices = True |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
829 else: |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
830 l1 = _l1 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
831 l2 = _l2 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
832 revertIndices = False |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
833 n1 = len(l1) |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
834 n2 = len(l2) |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
835 |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
836 if self.aligned: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
837 lcssValues = {} |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
838 similarityTables = {} |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
839 for i in range(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
840 self.similarities(l1, l2, i) |
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
841 lcssValues[i] = self.similarityTable.max() |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
842 similarityTables[i] = self.similarityTable |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
843 #print self.similarityTable |
521
3707eeb20f25
changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
518
diff
changeset
|
844 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
845 self.similarityTable = similarityTables[alignmentShift] |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
846 else: |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
847 alignmentShift = 0 |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
848 self.similarities(l1, l2) |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
849 |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
850 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
851 self.similarityTable = self.similarityTable[:min(n1, n2+alignmentShift+self.delta)+1, :min(n2, n1-alignmentShift+self.delta)+1] |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
852 |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
853 if computeSubSequence: |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
854 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) |
371
924e38c9f70e
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
370
diff
changeset
|
855 if revertIndices: |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
856 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices] |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
857 return self.similarityTable[-1,-1] |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
858 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
859 def compute(self, l1, l2, computeSubSequence = False): |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
860 '''get methods are to be shadowed in child classes ''' |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
861 return self._compute(l1, l2, computeSubSequence) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
862 |
375
2ea8584aa80a
making indicator LCSS work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
374
diff
changeset
|
863 def computeAlignment(self): |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
864 return mean([j-i for i,j in self.subSequenceIndices]) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
865 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
866 def _computeNormalized(self, l1, l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
867 ''' compute the normalized LCSS |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
868 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
869 lengthFunc = lambda x,y:float(x,y)/2''' |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
870 return float(self._compute(l1, l2, computeSubSequence))/self.lengthFunc(len(l1), len(l2)) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
871 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
872 def computeNormalized(self, l1, l2, computeSubSequence = False): |
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
873 return self._computeNormalized(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
874 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
875 def _computeDistance(self, l1, l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
876 ''' compute the LCSS distance''' |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
877 return 1-self._computeNormalized(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
878 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
879 def computeDistance(self, l1, l2, computeSubSequence = False): |
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
880 return self._computeDistance(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
881 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
882 ######################### |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
883 # plotting section |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
884 ######################### |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
885 |
940
d8ab183a7351
verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
876
diff
changeset
|
886 def plotPolygon(poly, options = '', **kwargs): |
332
a6ca86107f27
reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
324
diff
changeset
|
887 'Plots shapely polygon poly' |
a6ca86107f27
reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
324
diff
changeset
|
888 from matplotlib.pyplot import plot |
940
d8ab183a7351
verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
876
diff
changeset
|
889 x,y = poly.exterior.xy |
d8ab183a7351
verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
876
diff
changeset
|
890 plot(x, y, options, **kwargs) |
332
a6ca86107f27
reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
324
diff
changeset
|
891 |
324 | 892 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1): |
893 '''for each value in X, increment by increment the initial count | |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
894 returns the lists that can be plotted |
324 | 895 to obtain a step plot increasing by one for each value in x, from first to last value |
896 firstX and lastX should be respectively smaller and larger than all elements in X''' | |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
897 |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
898 sortedX = [] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
899 counts = [initialCount] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
900 for x in sorted(X): |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
901 sortedX += [x,x] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
902 counts.append(counts[-1]) |
324 | 903 counts.append(counts[-1]+increment) |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
904 counts.append(counts[-1]) |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
905 return [firstX]+sortedX+[lastX], counts |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
906 |
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
659
diff
changeset
|
907 class PlottingPropertyValues(object): |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
908 def __init__(self, values): |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
909 self.values = values |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
910 |
116
2bf5b76320c0
moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
115
diff
changeset
|
911 def __getitem__(self, i): |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
912 return self.values[i%len(self.values)] |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
913 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
914 markers = PlottingPropertyValues(['+', '*', ',', '.', 'x', 'D', 's', 'o']) |
116
2bf5b76320c0
moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
115
diff
changeset
|
915 scatterMarkers = PlottingPropertyValues(['s','o','^','>','v','<','d','p','h','8','+','x']) |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
916 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
917 linestyles = PlottingPropertyValues(['-', '--', '-.', ':']) |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
918 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
919 colors = PlottingPropertyValues('brgmyck') # 'w' |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
920 |
990 | 921 def monochromeCycler(withMarker = False): |
922 from cycler import cycler | |
923 if withMarker: | |
924 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']) * cycler('marker', ['^',',', '.'])) | |
925 else: | |
926 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.'])) | |
927 plt.rc('axes', prop_cycle=monochrome) | |
928 | |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
929 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
930 from matplotlib.pyplot import pcolor |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
931 coords = array(list(indicatorMap.keys())) |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
932 minX = min(coords[:,0]) |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
933 minY = min(coords[:,1]) |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
934 X = arange(minX, max(coords[:,0])+1.1)*squareSize |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
935 Y = arange(minY, max(coords[:,1])+1.1)*squareSize |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
936 C = defaultValue*ones((len(Y), len(X))) |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
937 for k,v in indicatorMap.items(): |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
938 C[k[1]-minY,k[0]-minX] = v |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
939 if masked: |
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
940 pcolor(X, Y, ma.masked_where(C==defaultValue,C)) |
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
941 else: |
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
942 pcolor(X, Y, C) |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
943 |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
944 ######################### |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
945 # Data download |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
946 ######################### |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
947 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
948 def downloadECWeather(stationID, years, months = [], outputDirectoryname = '.', english = True): |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
949 '''Downloads monthly weather data from Environment Canada |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
950 If month is provided (number 1 to 12), it means hourly data for the whole month |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
951 Otherwise, means the data for each day, for the whole year |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
952 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
953 Example: MONTREAL MCTAVISH 10761 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
954 MONTREALPIERRE ELLIOTT TRUDEAU INTL A 5415 |
856
e310577cc0b8
updated function (url) for weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
855
diff
changeset
|
955 see ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station%20Inventory%20EN.csv |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
956 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
957 To get daily data for 2010 and 2011, downloadECWeather(10761, [2010,2011], [], '/tmp') |
973 | 958 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp') |
959 | |
960 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done | |
961 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done | |
962 ''' | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
963 import urllib.request |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
964 if english: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
965 language = 'e' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
966 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
967 language = 'f' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
968 if len(months) == 0: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
969 timeFrame = 2 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
970 months = [1] |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
971 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
972 timeFrame = 1 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
973 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
974 for year in years: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
975 for month in months: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
976 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year) |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
977 if timeFrame == 1: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
978 outFilename += '-{}-hourly'.format(month) |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
979 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
980 outFilename += '-daily' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
981 outFilename += '.csv' |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
982 url = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=Download+Data'.format(language, stationID, year, month, timeFrame), outFilename) |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
983 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
984 ######################### |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
985 # File I/O |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
986 ######################### |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
987 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
988 def removeExtension(filename, delimiter = '.'): |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
989 '''Returns the filename minus the extension (all characters after last .)''' |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
990 i = filename.rfind(delimiter) |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
991 if i>0: |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
992 return filename[:i] |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
993 else: |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
994 return filename |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
995 |
969
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
996 def getExtension(filename, delimiter = '.'): |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
997 '''Returns the filename minus the extension (all characters after last .)''' |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
998 i = filename.rfind(delimiter) |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
999 if i>0: |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1000 return filename[i+1:] |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1001 else: |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1002 return '' |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1003 |
46
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1004 def cleanFilename(s): |
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1005 'cleans filenames obtained when contatenating figure characteristics' |
739 | 1006 return s.replace(' ','-').replace('.','').replace('/','-').replace(',','') |
46
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1007 |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1008 def getRelativeFilename(parentPath, filename): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1009 'Returns filename if absolute, otherwise parentPath/filename as string' |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1010 filePath = Path(filename) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1011 if filePath.is_absolute(): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1012 return filename |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1013 else: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1014 return str(parentPath/filePath) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1015 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1016 def listfiles(dirname, extension, remove = False): |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1017 '''Returns the list of files with the extension in the directory dirname |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1018 If remove is True, the filenames are stripped from the extension''' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1019 d = Path(dirname) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1020 if d.is_dir(): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1021 tmp = [str(f) for f in d.glob('*.extension')] |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1022 if remove: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1023 return [removeExtension(f, extension) for f in tmp] |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1024 else: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1025 return tmp |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1026 else: |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1027 print(dirname+' is not a directory') |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1028 return [] |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1029 |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1030 def mkdir(dirname): |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1031 'Creates a directory if it does not exist' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1032 p = Path(dirname) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1033 if not p.exists(): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1034 p.mkdir() |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1035 else: |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1036 print(dirname+' already exists') |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1037 |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1038 def removeFile(filename): |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1039 '''Deletes the file while avoiding raising an error |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1040 if the file does not exist''' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1041 f = Path(filename) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1042 if (f.exists()): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1043 f.unlink() |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1044 else: |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1045 print(filename+' does not exist') |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1046 |
42
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1047 def line2Floats(l, separator=' '): |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1048 '''Returns the list of floats corresponding to the string''' |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1049 return [float(x) for x in l.split(separator)] |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1050 |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1051 def line2Ints(l, separator=' '): |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1052 '''Returns the list of ints corresponding to the string''' |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1053 return [int(x) for x in l.split(separator)] |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1054 |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1055 ######################### |
553
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1056 # Profiling |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1057 ######################### |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1058 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1059 def analyzeProfile(profileFilename, stripDirs = True): |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1060 '''Analyze the file produced by cProfile |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1061 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1062 obtained by for example: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1063 - call in script (for main() function in script) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1064 import cProfile, os |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1065 cProfile.run('main()', os.path.join(os.getcwd(),'main.profile')) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1066 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1067 - or on the command line: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1068 python -m cProfile [-o profile.bin] [-s sort] scriptfile [arg]''' |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1069 import pstats, os |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1070 p = pstats.Stats(os.path.join(os.pardir, profileFilename)) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1071 if stripDirs: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1072 p.strip_dirs() |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1073 p.sort_stats('time') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1074 p.print_stats(.2) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1075 #p.sort_stats('time') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1076 # p.print_callees(.1, 'int_prediction.py:') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1077 return p |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1078 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1079 ######################### |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1080 # running tests |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1081 ######################### |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1082 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1083 if __name__ == "__main__": |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1084 import doctest |
2
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1085 import unittest |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1086 suite = doctest.DocFileSuite('tests/utils.txt') |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1087 #suite = doctest.DocTestSuite() |
2
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1088 unittest.TextTestRunner().run(suite) |
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1089 #doctest.testmod() |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1090 #doctest.testfile("example.txt") |