annotate trafficintelligence/utils.py @ 1278:8e61ff3cd503 default tip

correct bug to take into account first frame num in config, and other related bugs in dltrack.py
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 27 Jun 2024 15:31:36 -0400
parents bae8de98406f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1 #! /usr/bin/env python
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
2 ''' Generic utilities.'''
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
3
397
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
4 from datetime import time, datetime
971
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
5 from argparse import ArgumentTypeError
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
6 from pathlib import Path
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
7 from math import sqrt, ceil, floor
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
8 from copy import deepcopy, copy
1034
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
9 from collections import Counter
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
10
1156
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
11 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t, chi2_contingency
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
12 from scipy.spatial import distance
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
13 from scipy.sparse import dok_matrix
1250
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
14 from numpy import zeros, array, exp, sum as npsum, int64 as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit, float64
1124
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1105
diff changeset
15 from numpy.random import random_sample, permutation as nppermutation
1156
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
16 from pandas import DataFrame, concat, crosstab
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
17 import matplotlib.pyplot as plt
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
18
421
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
19 datetimeFormat = "%Y-%m-%d %H:%M:%S"
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
20
969
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
21 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
22
185
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
23 #########################
1030
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
24 # txt files
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
25 #########################
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
26
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
27 commentChar = '#'
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
28
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
29 delimiterChar = '%';
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
30
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
31 def openCheck(filename, option = 'r', quitting = False):
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
32 '''Open file filename in read mode by default
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
33 and checks it is open'''
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
34 try:
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
35 return open(filename, option)
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
36 except IOError:
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
37 print('File {} could not be opened.'.format(filename))
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
38 if quitting:
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
39 from sys import exit
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
40 exit()
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
41 return None
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
42
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
43 def readline(f, commentCharacters = commentChar):
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
44 '''Modified readline function to skip comments
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
45 Can take a list of characters or a string (in will work in both)'''
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
46 s = f.readline()
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
47 while (len(s) > 0) and s[0] in commentCharacters:
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
48 s = f.readline()
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
49 return s.strip()
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
50
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
51 def getLines(f, delimiterChar = delimiterChar, commentCharacters = commentChar):
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
52 '''Gets a complete entry (all the lines) in between delimiterChar.'''
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
53 dataStrings = []
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
54 s = readline(f, commentCharacters)
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
55 while len(s) > 0 and s[0] != delimiterChar:
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
56 dataStrings += [s.strip()]
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
57 s = readline(f, commentCharacters)
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
58 return dataStrings
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
59
aafbc0bab925 moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1029
diff changeset
60 #########################
742
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
61 # Strings
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
62 #########################
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
63
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
64 def upperCaseFirstLetter(s):
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
65 words = s.split(' ')
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
66 lowerWords = [w[0].upper()+w[1:].lower() for w in words]
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
67 return ' '.join(lowerWords)
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
68
971
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
69 class TimeConverter:
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
70 def __init__(self, datetimeFormat = datetimeFormat):
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
71 self.datetimeFormat = datetimeFormat
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
72
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
73 def convert(self, s):
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
74 try:
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
75 return datetime.strptime(s, self.datetimeFormat)
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
76 except ValueError:
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
77 msg = "Not a valid date: '{0}'.".format(s)
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
78 raise ArgumentTypeError(msg)
9897a13772fb added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 969
diff changeset
79
742
fe71639f1ee7 merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 741
diff changeset
80 #########################
185
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
81 # Enumerations
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
82 #########################
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
83
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
84 def inverseEnumeration(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
85 'Returns the dictionary that provides for each element in the input list its index in the input list'
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
86 result = {}
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
87 for i,x in enumerate(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
88 result[x] = i
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
89 return result
155
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
90
1135
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
91 def findElement(l, num):
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
92 i = 0
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
93 while l[i].getNum() != num:
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
94 i += 1
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
95 if i < len(l):
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
96 return l[i]
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
97 else:
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
98 return None
342701cdac30 bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1124
diff changeset
99
155
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
100 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
101 # Simple statistics
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
102 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
103
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
104 def logNormalMeanVar(loc, scale):
687
de278c5e65f6 minor comments for lognormal parameters (numpy and usual names differ)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 686
diff changeset
105 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution
854
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
106 https://en.wikipedia.org/wiki/Log-normal_distribution
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
107
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
108 same as lognorm.stats(scale, 0, exp(loc))'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
109 mean = exp(loc+(scale**2)/2)
854
33d296984dd8 rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 847
diff changeset
110 var = (exp(scale**2)-1)*exp(2*loc+scale**2)
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
111 return mean, var
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 677
diff changeset
112
855
2277ab1a8141 added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 854
diff changeset
113 def fitLogNormal(x):
2277ab1a8141 added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 854
diff changeset
114 'returns the fitted location and scale of the lognormal (general definition)'
2277ab1a8141 added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 854
diff changeset
115 shape, loc, scale = lognorm.fit(x, floc=0.)
2277ab1a8141 added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 854
diff changeset
116 return log(scale), shape
2277ab1a8141 added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 854
diff changeset
117
859
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
118 def sampleSize(stdev, tolerance, percentConfidence, nRoundingDigits = None, printLatex = False):
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
119 if nRoundingDigits is None:
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
120 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) # 1.-(100-percentConfidence)/200.
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
121 else:
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
122 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), nRoundingDigits)
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
123 stdev = round(stdev, nRoundingDigits)
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
124 tolerance = round(tolerance, nRoundingDigits)
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
125 if printLatex:
859
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
126 print('$z_{{{}}}^2\\frac{{s^2}}{{e^2}}={}^2\\frac{{{}^2}}{{{}^2}}$'.format(0.5+percentConfidence/200.,k, stdev, tolerance))
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
127 return (k*stdev/tolerance)**2
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
128
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
129 def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False):
499
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
130 '''if trueStd, use normal distribution, otherwise, Student
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
131
973
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
132 Use otherwise t.interval or norm.interval for the boundaries
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
133 ex: norm.interval(0.95)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
134 t.interval(0.95, nSamples-1)'''
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
135 if trueStd:
859
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
136 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2)
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
137 else: # use Student
859
a8de3c93f6b7 minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 856
diff changeset
138 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1), 2)
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
139 e = k*stdev/sqrt(nSamples)
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
140 if printLatex:
1267
ad60e5adf084 cleaned interaction categorization and added stationary category
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1250
diff changeset
141 print('${0} \\pm {1}\\frac{{{2}}}{{\\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples))
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
142 return mean-e, mean+e
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
143
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
144 def computeChi2(expected, observed):
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
145 '''Returns the Chi2 statistics'''
876
c7e72d758049 minor update to avoid integer issue
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 859
diff changeset
146 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)])
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
147
1103
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
148 class ConstantDistribution(object):
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
149 '''Distribution returning always the same value for the random variable '''
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
150 def __init__(self, value):
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
151 self.value = value
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
152
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
153 def rvs(self, size = 1):
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
154 if size == 1:
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
155 return self.value
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
156 else:
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
157 return array([self.value]*size)
7594802f281a added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1089
diff changeset
158
1031
045cb04ad7b8 corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1030
diff changeset
159 class EmpiricalContinuousDistribution(rv_continuous):
045cb04ad7b8 corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1030
diff changeset
160 def __init__(self, values, probabilities, **kwargs):
1028
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
161 '''The values (and corresponding probabilities) are supposed to be sorted by value
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
162 for v, p in zip(values, probabilities): P(X<=v)=p'''
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
163 assert probabilities[0]==0
1031
045cb04ad7b8 corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1030
diff changeset
164 super(EmpiricalContinuousDistribution, self).__init__(**kwargs)
1028
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
165 self.values = values
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
166 self.probabilities = probabilities
1088
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
167
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
168 def save(self, filename):
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
169 import yaml
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
170 with open(filename, 'w') as out:
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
171 yaml.dump([self.values, self.probabilities], out)
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
172
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
173 @staticmethod
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
174 def load(filename):
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
175 import yaml
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
176 with open(filename) as f:
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
177 values, probabilities = yaml.load(f)
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
178 return EmpiricalContinuousDistribution(values, probabilities)
0680387a89bb added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1086
diff changeset
179
1028
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
180 def _cdf(self, x):
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
181 if x < self.values[0]:
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
182 return self.probabilities[0]
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
183 else:
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
184 i=0
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
185 while i+1<len(self.values) and self.values[i+1] < x:
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
186 i += 1
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
187 if i == len(self.values)-1:
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
188 return self.probabilities[-1]
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
189 else:
1031
045cb04ad7b8 corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1030
diff changeset
190 return self.probabilities[i]+(x-self.values[i])*float(self.probabilities[i+1]-self.probabilities[i])/float(self.values[i+1]-self.values[i])
1028
cc5cb04b04b0 major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1023
diff changeset
191
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
192 class DistributionSample(object):
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
193 def nSamples(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
194 return sum(self.counts)
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
195
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
196 def cumulativeDensityFunction(sample, normalized = False):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
197 '''Returns the cumulative density function of the sample of a random variable'''
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
198 xaxis = sorted(sample)
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
199 counts = arange(1,len(sample)+1) # dtype = float
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
200 if normalized:
1031
045cb04ad7b8 corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1030
diff changeset
201 counts = counts.astype(float)/float(len(sample))
197
2788b2827670 simple cumulatie function distribution computation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 185
diff changeset
202 return xaxis, counts
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
203
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
204 class DiscreteDistributionSample(DistributionSample):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
205 '''Class to represent a sample of a distribution for a discrete random variable'''
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
206 def __init__(self, categories, counts):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
207 self.categories = categories
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
208 self.counts = counts
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
209
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
210 def mean(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
211 result = [float(x*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
212 return npsum(result)/self.nSamples()
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
213
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
214 def var(self, mean = None):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
215 if not mean:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
216 m = self.mean()
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
217 else:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
218 m = mean
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
219 result = 0.
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
220 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
221 return npsum(squares)/(self.nSamples()-1)
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
222
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
223 def referenceCounts(self, probability):
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
224 '''probability is a function that returns the probability of the random variable for the category values'''
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
225 refProba = [probability(c) for c in self.categories]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
226 refProba[-1] = 1-npsum(refProba[:-1])
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
227 refCounts = [r*self.nSamples() for r in refProba]
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
228 return refCounts, refProba
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
229
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
230 class ContinuousDistributionSample(DistributionSample):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
231 '''Class to represent a sample of a distribution for a continuous random variable
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
232 with the number of observations for each interval
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
233 intervals (categories variable) are defined by their left limits, the last one being the right limit
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
234 categories contain therefore one more element than the counts'''
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
235 def __init__(self, categories, counts):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
236 # todo add samples for initialization and everything to None? (or setSamples?)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
237 self.categories = categories
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
238 self.counts = counts
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
239
749
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
240 @staticmethod
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
241 def generate(sample, categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
242 if min(sample) < min(categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
243 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories)))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
244 if max(sample) > max(categories):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
245 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories)))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
246 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
247 for s in sample:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
248 i = 0
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
249 while i<len(dist.categories) and dist.categories[i] <= s:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
250 i += 1
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
251 if i <= len(dist.counts):
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
252 dist.counts[i-1] += 1
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
253 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i]))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
254 else:
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
255 print('Element {} is not in the categories'.format(s))
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
256 return dist
10dbab1e871d modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 742
diff changeset
257
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
258 def mean(self):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
259 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
260 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
261 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
262 return result/self.nSamples()
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
263
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
264 def var(self, mean = None):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
265 if not mean:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
266 m = self.mean()
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
267 else:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
268 m = mean
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
269 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
270 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
271 mid = (self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
272 result += self.counts[i]*(mid - m)*(mid - m)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
273 return result/(self.nSamples()-1)
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
274
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
275 def referenceCounts(self, cdf):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
276 '''cdf is a cumulative distribution function
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
277 returning the probability of the variable being less that x'''
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
278 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
279 # for inter in self.categories:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
280 # refCumulativeCounts.append(cdf(inter[1]))
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
281 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]]
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
282
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
283 refProba = [refCumulativeCounts[0]]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
284 for i in xrange(1,len(refCumulativeCounts)):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
285 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
286 refProba.append(1-refCumulativeCounts[-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
287 refCounts = [p*self.nSamples() for p in refProba]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
288
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
289 return refCounts, refProba
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
290
77
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
291 def printReferenceCounts(self, refCounts=None):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
292 if refCounts:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
293 ref = refCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
294 else:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
295 ref = self.referenceCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
296 for i in xrange(len(ref[0])):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
297 print('{0}-{1} & {2:0.3} & {3:0.3} \\\\'.format(self.categories[i],self.categories[i+1],ref[1][i], ref[0][i]))
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
298
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
299
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
300 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
301 # maths section
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
302 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
303
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
304 # def kernelSmoothing(sampleX, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
305 # '''Returns a smoothed weighted version of Y at the predefined values of sampleX
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
306 # Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
307 # from numpy import zeros, array
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
308 # smoothed = zeros(len(sampleX))
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
309 # for i,x in enumerate(sampleX):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
310 # weights = array([weightFunc(x,xx, halfwidth) for xx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
311 # if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
312 # smoothed[i] = sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
313 # else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
314 # smoothed[i] = 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
315 # return smoothed
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
316
1124
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1105
diff changeset
317 def generateData(nrows, nvariables, scale):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1105
diff changeset
318 x = random_sample(nrows*nvariables).reshape(nrows,nvariables)*scale
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1105
diff changeset
319 return DataFrame(x, columns=['x{}'.format(i+1) for i in range(nvariables)])
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1105
diff changeset
320
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
321 def kernelSmoothing(x, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
322 '''Returns the smoothed estimate of (X,Y) at x
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
323 Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
324 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
325 if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
326 return sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
327 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
328 return 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
329
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
330 def uniform(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
331 if abs(center-x)<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
332 return 1.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
333 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
334 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
335
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
336 def gaussian(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
337 return exp(-((center-x)/halfwidth)**2/2)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
338
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
339 def epanechnikov(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
340 diff = abs(center-x)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
341 if diff<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
342 return 1.-(diff/halfwidth)**2
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
343 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
344 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
345
434
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
346 def triangular(center, x, halfwidth):
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
347 diff = abs(center-x)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
348 if diff<halfwidth:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
349 return 1.-abs(diff/halfwidth)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
350 else:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
351 return 0.
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
352
518
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
353 def medianSmoothing(x, X, Y, halfwidth):
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
354 '''Returns the media of Y's corresponding to X's in the interval [x-halfwidth, x+halfwidth]'''
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
355 return median([y for observedx, y in zip(X,Y) if abs(x-observedx)<halfwidth])
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
356
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
357 def argmaxDict(d):
561
ee45c6eb6d49 added Mohamed Gomaa Mohamed function to smooth object trajectories
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 553
diff changeset
358 return max(d, key=d.get)
279
3af4c267a7bf generic simple LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 276
diff changeset
359
837
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
360 def deltaFrames(t1, t2, frameRate):
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
361 '''Returns the number of frames between t1 and t2
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
362 positive if t1<=t2, negative otherwise'''
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
363 if t1 > t2:
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
364 return -(t1-t2).seconds*frameRate
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
365 else:
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
366 return (t2-t1).seconds*frameRate
e01cabca4c55 minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 749
diff changeset
367
395
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
368 def framesToTime(nFrames, frameRate, initialTime = time()):
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
369 '''returns a datetime.time for the time in hour, minutes and seconds
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
370 initialTime is a datetime.time'''
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
371 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second)
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
372 h = int(floor(seconds/3600.))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
373 seconds = seconds - h*3600
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
374 m = int(floor(seconds/60))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
375 seconds = seconds - m*60
262
a048066bd20f correcting bug in framesToTime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 261
diff changeset
376 return time(h, m, seconds)
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
377
381
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
378 def timeToFrames(t, frameRate):
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
379 return frameRate*(t.hour*3600+t.minute*60+t.second)
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
380
1059
a87b3072bd26 working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1058
diff changeset
381 def timeModulo(t, duration):
a87b3072bd26 working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1058
diff changeset
382 'returns the time modulo the duration in min'
a87b3072bd26 working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1058
diff changeset
383 return time(t.hour, t.minute//duration, t.second)
a87b3072bd26 working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1058
diff changeset
384
241
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
385 def sortXY(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
386 'returns the sorted (x, Y(x)) sorted on X'
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
387 D = {}
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
388 for x, y in zip(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
389 D[x]=y
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
390 xsorted = sorted(D.keys())
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
391 return xsorted, [D[x] for x in xsorted]
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
392
733
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
393 def compareLengthForSort(i, j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
394 if len(i) < len(j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
395 return -1
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
396 elif len(i) == len(j):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
397 return 0
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
398 else:
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
399 return 1
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
400
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
401 def sortByLength(instances, reverse = False):
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
402 '''Returns a new list with the instances sorted by length (method __len__)
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
403 reverse is passed to sorted'''
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
404 return sorted(instances, key = len, reverse = reverse)
733
c35e4a4b199d sortbylength
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 698
diff changeset
405
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
406 def ceilDecimals(v, nDecimals):
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
407 '''Rounds the number at the nth decimal
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
408 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
409 tens = 10**nDecimals
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
410 return ceil(v*tens)/tens
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
411
152
74b1fc68d4df re-organized code to avoid cyclic python module dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 116
diff changeset
412 def inBetween(bound1, bound2, x):
698
8d99a9e16644 added clarification comments
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 689
diff changeset
413 'useful if one does not know the order of bound1/bound2'
569
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
414 return bound1 <= x <= bound2 or bound2 <= x <= bound1
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
415
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
416 def pointDistanceL2(x1,y1,x2,y2):
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
417 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)'''
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
418 return sqrt((x2-x1)**2+(y2-y1)**2)
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
419
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
420 def crossProduct(l1, l2):
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
421 return l1[0]*l2[1]-l1[1]*l2[0]
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
422
1276
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
423 def filterCategoricalMovingWindow(categoricalList, halfWidth):
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
424 ''' Return a list of categories/values smoothed according to a window.
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
425 halfWidth is the search radius on either side'''
1276
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
426 length = len(categoricalList)
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
427 smoothed = [0]*length
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
428 for point in range(length):
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
429 lowerBound = max(0,point-halfWidth)
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
430 upperBound = min(length,point+halfWidth+1)
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
431 window = categoricalList[lowerBound:upperBound]
bae8de98406f corrected bug in categorical value smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1267
diff changeset
432 smoothed[point] = max(set(window), key=window.count)
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
433 return smoothed
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
434
1250
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
435 def filterMovingWindow(inputSignal, halfWidth):
1222
69b531c7a061 added methods to reset trajectories and change object coordinates (including features)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1200
diff changeset
436 '''Returns an array obtained after the smoothing of the 1-D input by a moving average
1200
4356065ed3ca updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1168
diff changeset
437 The size of the output depends on the mode: 'full', 'same', 'valid'
4356065ed3ca updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1168
diff changeset
438 See https://numpy.org/doc/stable/reference/generated/numpy.convolve.html.'''
1250
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
439 halfWidth = min(floor((len(inputSignal)-1)/2.), halfWidth)
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
440 win = ones(2*halfWidth+1)/(2*halfWidth+1)
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
441 filtered = array(inputSignal, dtype=float64)
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
442 filtered[halfWidth:-halfWidth] = convolve(inputSignal, win, 'valid') # .ravel()
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
443 for i in range(halfWidth-1):
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
444 filtered[i] = sum(inputSignal[:2*i+1])/(2*i+1)
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
445 filtered[-1-i] = sum(inputSignal[-1-2*i:])/(2*i+1)
77fbd0e2ba7d dltrack works with moving average filtering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1222
diff changeset
446 return filtered
29
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
447
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
448 def linearRegression(x, y, deg = 1, plotData = False):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
449 '''returns the least square estimation of the linear regression of y = ax+b
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
450 as well as the plot'''
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
451 coef = polyfit(x, y, deg)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
452 if plotData:
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
453 def poly(x):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
454 result = 0
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
455 for i in range(len(coef)):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
456 result += coef[i]*x**(len(coef)-i-1)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
457 return result
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
458 plt.plot(x, y, 'x')
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
459 xx = arange(min(x), max(x),(max(x)-min(x))/1000)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
460 plt.plot(xx, [poly(z) for z in xx])
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
461 return coef
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
462
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
463 def correlation(data, correlationMethod = 'pearson', plotFigure = False, displayNames = None, figureFilename = None):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
464 '''Computes (and displays) the correlation matrix for a pandas DataFrame'''
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
465 columns = data.columns.tolist()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
466 for var in data.columns:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
467 uniqueValues = data[var].unique()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
468 if len(uniqueValues) == 1 or data.dtypes[var] == dtype('O') or (len(uniqueValues) == 2 and len(data.loc[~isnan(data[var]), var].unique()) == 1): # last condition: only one other value than nan
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
469 columns.remove(var)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
470 c=data[columns].corr(correlationMethod)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
471 if plotFigure:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
472 fig = plt.figure(figsize=(4+0.4*c.shape[0], 0.4*c.shape[0]))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
473 fig.add_subplot(1,1,1)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
474 #plt.imshow(np.fabs(c), interpolation='none')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
475 plt.imshow(c, vmin=-1., vmax = 1., interpolation='none', cmap = 'RdYlBu_r') # coolwarm
847
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
476 if displayNames is not None:
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
477 colnames = [displayNames.get(s.strip(), s.strip()) for s in columns]
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
478 else:
36c5bee9a887 bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 841
diff changeset
479 colnames = columns
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
480 #correlation.plot_corr(c, xnames = colnames, normcolor=True, title = filename)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
481 plt.xticks(range(len(colnames)), colnames, rotation=90)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
482 plt.yticks(range(len(colnames)), colnames)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
483 plt.tick_params('both', length=0)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
484 plt.subplots_adjust(bottom = 0.29)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
485 plt.colorbar()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
486 plt.title('Correlation ({})'.format(correlationMethod))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
487 plt.tight_layout()
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
488 if len(colnames) > 50:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
489 plt.subplots_adjust(left=.06)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
490 if figureFilename is not None:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
491 plt.savefig(figureFilename, dpi = 150, transparent = True)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
492 return c
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
493
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
494 def addDummies(data, variables, allVariables = True):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
495 '''Add binary dummy variables for each value of a nominal variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
496 in a pandas DataFrame'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
497 newVariables = []
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
498 for var in variables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
499 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
500 values = data[var].unique()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
501 if not allVariables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
502 values = values[:-1]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
503 for val in values:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
504 if val is not NaN:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
505 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
506 data[newVariable] = (data[var] == val)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
507 newVariables.append(newVariable)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
508 return newVariables
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
509
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
510 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = ''):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
511 '''Studies the influence of (nominal) independent variable over the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
512
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
513 Makes tests if the conditional distributions are normal
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
514 using the Shapiro-Wilk test (in which case ANOVA could be used)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
515 Implements uses the non-parametric Kruskal Wallis test'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
516 tmp = data[data[independentVariable].notnull()]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
517 independentVariableValues = sorted(tmp[independentVariable].unique().tolist())
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
518 if len(independentVariableValues) >= 2:
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
519 if saveLatex:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
520 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w')
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
521 for x in independentVariableValues:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
522 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable])))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
523 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3:
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 973
diff changeset
524 print(shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable]))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
525 if plotFigure:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
526 plt.figure()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
527 plt.boxplot([tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
528 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
529 plt.title('{} vs {}'.format(dependentVariable, independentVariable))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
530 if filenamePrefix is not None:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
531 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
532 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
533 table['count'] = table['count'].astype(int)
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
534 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
535 if saveLatex:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
536 out.write('\\begin{minipage}{\\linewidth}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
537 +'\\centering\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
538 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
539 +table.to_latex(float_format = lambda x: '{:.3f}'.format(x)).encode('ascii')+'\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
540 +'\\end{minipage}\n'
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
541 +'\\ \\vspace{0.5cm}\n')
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
542 else:
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 973
diff changeset
543 print(table)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
544 return testResult
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
545 else:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
546 return None
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
547
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
548 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc, stdoutText = ['Removing {} (constant: {})', 'Removing {} (correlation {} with {})', 'Removing {} (no correlation: {}, p={})'], saveFiles = False, filenamePrefix = None, latexHeader = '', latexTable = None, latexFooter=''):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
549 '''Removes variables from candidate independent variables if
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
550 - if two independent variables are correlated (> maxCorrelationThreshold), one is removed
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
551 - if an independent variable is not correlated with the dependent variable (p>maxCorrelationP)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
552 Returns the remaining non-correlated variables, correlated with the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
553
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
554 correlationFunc is spearmanr or pearsonr from scipy.stats
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
555 text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
556
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
557 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable'''
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
558 result = copy(independentVariables)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
559 table1 = ''
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
560 table2 = {}
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
561 # constant variables
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
562 for var in independentVariables:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
563 uniqueValues = data[var].unique()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
564 if (len(uniqueValues) == 1) or (len(uniqueValues) == 2 and uniqueValues.dtype != dtype('O') and len(data.loc[~isnan(data[var]), var].unique()) == 1):
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
565 print(stdoutText[0].format(var, uniqueValues))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
566 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
567 table1 += latexTable[0].format(var, *uniqueValues)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
568 result.remove(var)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
569 # correlated variables
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
570 for v1 in copy(result):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
571 if v1 in correlations.index:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
572 for v2 in copy(result):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
573 if v2 != v1 and v2 in correlations.index:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
574 if abs(correlations.loc[v1, v2]) > maxCorrelationThreshold:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
575 if v1 in result and v2 in result:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
576 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
577 table1 += latexTable[1].format(v2, v1, correlations.loc[v1, v2])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
578 print(stdoutText[1].format(v2, v1, correlations.loc[v1, v2]))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
579 result.remove(v2)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
580 # not correlated with dependent variable
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
581 table2['Correlations'] = []
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
582 table2['Valeurs p'] = []
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
583 for var in copy(result):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
584 if data.dtypes[var] != dtype('O'):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
585 cor, p = correlationFunc(data[dependentVariable], data[var])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
586 if p > maxCorrelationP:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
587 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
588 table1 += latexTable[2].format(var, cor, p)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
589 print(stdoutText[2].format(var, cor, p))
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
590 result.remove(var)
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
591 else:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
592 table2['Correlations'].append(cor)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
593 table2['Valeurs p'].append(p)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
594
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
595 if saveFiles:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
596 out = openCheck(filenamePrefix+'-removed-variables.tex', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
597 out.write(latexHeader)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
598 out.write(table1)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
599 out.write(latexFooter)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
600 out.close()
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
601 out = openCheck(filenamePrefix+'-correlations.html', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
602 table2['Variables'] = [var for var in result if data.dtypes[var] != dtype('O')]
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
603 out.write(DataFrame(table2)[['Variables', 'Correlations', 'Valeurs p']].to_html(formatters = {'Correlations': lambda x: '{:.2f}'.format(x), 'Valeurs p': lambda x: '{:.3f}'.format(x)}, index = False))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
604 out.close()
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
605 return result
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
606
841
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
607 def saveDokMatrix(filename, m, lowerTriangle = False):
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
608 'Saves a dok_matrix using savez'
841
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
609 if lowerTriangle:
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
610 keys = [k for k in m if k[0] > k[1]]
841
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
611 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys])
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 840
diff changeset
612 else:
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
613 savez(filename, shape = m.shape, keys = list(m.keys()), values = list(m.values()))
840
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
614
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
615 def loadDokMatrix(filename):
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
616 'Loads a dok_matrix saved using the above saveDokMatrix'
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
617 data = npload(filename)
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
618 m = dok_matrix(tuple(data['shape']))
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
619 for k, v in zip(data['keys'], data['values']):
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
620 m[tuple(k)] = v
15a82ebc62c4 utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 837
diff changeset
621 return m
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
622
1023
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
623 def aggregationFunction(funcStr, centile = 50):
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
624 '''return the numpy function corresponding to funcStr
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
625 centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles'''
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
626 if funcStr == 'median':
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
627 return median
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
628 elif funcStr == 'mean':
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
629 return mean
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
630 elif funcStr == 'centile':
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
631 return lambda x: percentile(x, centile)
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
632 elif funcStr == '85centile':
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
633 return lambda x: percentile(x, 85)
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
634 else:
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
635 print('Unknown aggregation method: {}'.format(funcStr))
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
636 return None
a13f47c8931d work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1022
diff changeset
637
1058
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
638 def aggregationMethods(methods, centiles = None):
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
639 aggFunctions = {}
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
640 headers = []
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
641 for method in methods:
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
642 if method == 'centile':
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
643 aggFunctions[method] = aggregationFunction(method, centiles)
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
644 for c in centiles:
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
645 headers.append('{}{}'.format(method,c))
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
646 else:
1060
c04550f957ab bug corrected
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1059
diff changeset
647 aggFunctions[method] = aggregationFunction(method)
1058
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
648 headers.append(method)
16575ca4537d work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1034
diff changeset
649 return aggFunctions, headers
1105
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
650
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
651 def maxSumSample(d, maxSum):
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
652 '''Generates a sample from distribution d (type scipy.stats, using rvs method)
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
653 until the sum of all elements is larger than maxSum'''
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
654 s = 0 # sum
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
655 sample = []
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
656 while s < maxSum:
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
657 x = d.rvs()
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
658 sample.append(x)
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
659 s += x
e62c2f5e25e6 added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1103
diff changeset
660 return sample
1156
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
661
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
662 def cramers_v(x, y):
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
663 """ calculate Cramers V statistic for categorial-categorial association.
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
664 uses correction from Bergsma and Wicher,
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
665 Journal of the Korean Statistical Society 42 (2013): 323-328
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
666 https://towardsdatascience.com/the-search-for-categorical-correlation-a1cf7f1888c9
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
667 https://stackoverflow.com/questions/46498455/categorical-features-correlation/46498792#46498792
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
668 """
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
669 confusionMatrix = crosstab(x,y)
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
670 chi2 = chi2_contingency(confusionMatrix)[0]
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
671 n = confusionMatrix.sum().sum()
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
672 phi2 = chi2/n
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
673 r,k = confusionMatrix.shape
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
674 phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
675 rcorr = r-((r-1)**2)/(n-1)
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
676 kcorr = k-((k-1)**2)/(n-1)
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
677 return sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
678
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
679 def categoricalCorrelationMatrix(data, categoricalVariables):
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
680 'Returns correlation matrix for the categorical variables'
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
681 corr = np.ones((len(categoricalVariables), len(categoricalVariables)))
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
682 for i in range(len(categoricalVariables)):
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
683 for j in range(i):
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
684 corr[i,j] = utils.cramers_v(petDf[categoricalVariables[i]], petDf[categoricalVariables[j]])
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
685 corr[j,i] = corr[i,j]
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
686 return corr
f7fbe624fff7 added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1135
diff changeset
687
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
688 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
689 # regression analysis using statsmodels (and pandas)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
690 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
691
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
692 # TODO make class for experiments?
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
693 # TODO add tests with public dataset downloaded from Internet (IRIS et al)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
694 def modelString(experiment, dependentVariable, independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
695 return dependentVariable+' ~ '+' + '.join([independentVariable for independentVariable in independentVariables if experiment[independentVariable]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
696
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
697 def runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
698 import statsmodels.formula.api as smf
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
699 modelStr = modelString(experiment, dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
700 if regressionType == 'ols':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
701 model = smf.ols(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
702 elif regressionType == 'gls':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
703 model = smf.gls(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
704 elif regressionType == 'rlm':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
705 model = smf.rlm(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
706 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
707 print('Unknown regression type {}. Exiting'.format(regressionType))
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
708 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
709 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
710 return model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
711
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
712 def runModels(experiments, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
713 '''Runs several models and stores 3 statistics
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
714 adjusted R2, condition number (should be small, eg < 1000)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
715 and p-value for Shapiro-Wilk test of residual normality'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
716 for i,experiment in experiments.iterrows():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
717 if experiment[independentVariables].any():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
718 results = runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols')
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
719 experiments.loc[i,'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
720 experiments.loc[i,'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
721 experiments.loc[i, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
722 experiments.loc[i,'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
723 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
724
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
725 def generateExperiments(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
726 '''Generates all possible models for including or not each independent variable'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
727 experiments = {}
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
728 nIndependentVariables = len(independentVariables)
669
df6be882f325 bug corrected
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 668
diff changeset
729 if nIndependentVariables != len(set(independentVariables)):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
730 print("Duplicate variables. Exiting")
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
731 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
732 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
733 nModels = 2**nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
734 for i,var in enumerate(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
735 pattern = [False]*(2**i)+[True]*(2**i)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
736 experiments[var] = pattern*(2**(nIndependentVariables-i-1))
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
737 experiments = DataFrame(experiments)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
738 experiments['r2adj'] = 0.
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
739 experiments['condNum'] = NaN
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
740 experiments['shapiroP'] = -1
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
741 experiments['nobs'] = -1
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
742 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
743
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
744 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
745 '''Generates all possible model with the independentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
746 and runs them, saving the results in experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
747 with multiprocess option'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
748 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
749 nModels = len(experiments)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
750 print("Running {} models with {} processes".format(nModels, nProcesses))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
751 print("IndependentVariables: {}".format(independentVariables))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
752 if nProcesses == 1:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
753 return runModels(experiments, data, dependentVariable, independentVariables, regressionType)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
754 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
755 pool = Pool(processes = nProcesses)
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
756 chunkSize = int(ceil(nModels/nProcesses))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
757 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)]
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
758 return concat([job.get() for job in jobs])
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
759
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
760 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
761 '''Forward search for best model (based on adjusted R2)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
762 Randomly starting with one variable and adding randomly variables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
763 if they improve the model
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
764
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
765 The results are added to experiments if provided as argument
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
766 Storing in experiment relies on the index being the number equal
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
767 to the binary code derived from the independent variables'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
768 if experiments is None:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
769 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
770 nIndependentVariables = len(independentVariables)
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
771 permutation = nppermutation(list(range(nIndependentVariables)))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
772 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
773 print('Tested variables '+', '.join([variableMapping[i] for i in range(nIndependentVariables)]))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
774 bestModel = [False]*nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
775 currentVarNum = 0
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
776 currentR2Adj = 0.
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
777 for currentVarNum in range(nIndependentVariables):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
778 currentModel = [i for i in bestModel]
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
779 currentModel[currentVarNum] = True
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
780 rowIdx = sum([0]+[2**i for i in range(nIndependentVariables) if currentModel[permutation[i]]])
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
781 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in range(nIndependentVariables) if currentModel[permutation[i]]])
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
782 if experiments.loc[rowIdx, 'shapiroP'] < 0:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
783 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
784 model = modelFunc(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
785 results = model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
786 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
787 experiments.loc[rowIdx, 'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
788 experiments.loc[rowIdx, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
789 experiments.loc[rowIdx, 'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
790 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
791 currentR2Adj = experiments.loc[rowIdx, 'r2adj']
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
792 bestModel[currentVarNum] = True
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
793 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
794
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
795 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf', text = {'title-shapiro': 'Shapiro-Wilk normality test for residuals: {:.2f} (p={:.3f})', 'true-predicted.xlabel': 'Predicted values', 'true-predicted.ylabel': 'True values', 'residuals-predicted.xlabel': 'Predicted values', 'residuals-predicted.ylabel': 'Residuals'}):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
796 import statsmodels.api as sm
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
797 '''Displays some model results
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
798
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
799 3 graphics, true-predicted, residuals-predicted, '''
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
800 print(results.summary())
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
801 shapiroResult = shapiro(results.resid)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
802 print(shapiroResult)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
803 if plotFigures:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
804 fig = plt.figure(figsize=(7,6.3*(2+int(model is not None))))
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
805 if model is not None:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
806 ax = fig.add_subplot(3,1,1)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
807 plt.plot(results.predict(), model.endog, 'x')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
808 x=plt.xlim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
809 y=plt.ylim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
810 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
811 #plt.axis('equal')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
812 if text is not None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
813 plt.title(text['title-shapiro'].format(*shapiroResult))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
814 #plt.title(text['true-predicted.title'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
815 plt.xlabel(text['true-predicted.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
816 plt.ylabel(text['true-predicted.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
817 fig.add_subplot(3,1,2, sharex = ax)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
818 plt.plot(results.predict(), results.resid, 'x')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
819 nextSubplotNum = 3
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
820 else:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
821 fig.add_subplot(2,1,1)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
822 plt.plot(results.predict(), results.resid, 'x')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
823 nextSubplotNum = 2
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
824 if text is not None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
825 if model is None:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
826 plt.title(text['title-shapiro'].format(*shapiroResult))
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
827 plt.xlabel(text['residuals-predicted.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
828 plt.ylabel(text['residuals-predicted.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
829 qqAx = fig.add_subplot(nextSubplotNum,1,nextSubplotNum)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
830 sm.qqplot(results.resid, fit = True, line = '45', ax = qqAx)
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
831 plt.axis('equal')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
832 if text is not None and 'qqplot.xlabel' in text:
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
833 plt.xlabel(text['qqplot.xlabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
834 plt.ylabel(text['qqplot.ylabel'])
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
835 plt.tight_layout()
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
836 if filenamePrefix is not None:
677
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
837 out = openCheck(filenamePrefix+'-coefficients.html', 'w')
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
838 out.write(results.summary().as_html())
ae07c7b4cf87 update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 676
diff changeset
839 plt.savefig(filenamePrefix+'-model-results.'+figureFileType)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
840
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
841 #########################
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
842 # iterable section
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
843 #########################
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
844
1034
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
845 def mostCommon(l):
456
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
846 '''Returns the most frequent element in a iterable
1034
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
847 The element must be hashable
456
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
848
1034
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
849 new version from https://stackoverflow.com/questions/41612368/find-most-common-element
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
850 previous version from from http://stackoverflow.com/questions/1518522/python-most-common-element-in-a-list'''
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
851 return Counter(l).most_common(1)[0][0]
4069d8545922 updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1031
diff changeset
852
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
853 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
854 # sequence section
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
855 #########################
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
856
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
857 class LCSS(object):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
858 '''Class that keeps the LCSS parameters
686
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
859 and puts together the various computations
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
860
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
861 the methods with names starting with _ are not to be shadowed
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
862 in child classes, who will shadow the other methods,
cdee6a3a47b4 allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
863 ie compute and computeXX methods'''
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
864 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
865 '''One should provide either a similarity function
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
866 that indicates (return bool) whether elements in the compares lists are similar
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
867
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
868 eg distance(p1, p2) < epsilon
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
869
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
870 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
871 if similarityFunc is None and metric is None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
872 print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
873 import sys
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
874 sys.exit()
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
875 elif metric is not None and epsilon is None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
876 print("Please provide a value for epsilon if using a cdist metric. Exiting")
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
877 import sys
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
878 sys.exit()
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
879 else:
741
5b91b8d97cf3 corrected bug
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 739
diff changeset
880 if similarityFunc is None and metric is not None and not isinf(delta):
737
fb60b54e1041 added warning for finite delta
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 733
diff changeset
881 print('Warning: you are using a cdist metric and a finite delta, which will make probably computation slower than using the equivalent similarityFunc (since all pairwise distances will be computed by cdist).')
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
882 self.similarityFunc = similarityFunc
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
883 self.metric = metric
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
884 self.epsilon = epsilon
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
885 self.aligned = aligned
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
886 self.delta = delta
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
887 self.lengthFunc = lengthFunc
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
888 self.subSequenceIndices = [(0,0)]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
889
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
890 def similarities(self, l1, l2, jshift=0):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
891 n1 = len(l1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
892 n2 = len(l2)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
893 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
894 if self.similarityFunc is not None:
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
895 for i in range(1,n1+1):
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
896 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
897 if self.similarityFunc(l1[i-1], l2[j-1]):
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
898 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
899 else:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
900 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
901 elif self.metric is not None:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
902 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
903 for i in range(1,n1+1):
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
904 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
905 if similarElements[i-1, j-1]:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
906 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
907 else:
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
908 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
909
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
910
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
911 def subSequence(self, i, j):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
912 '''Returns the subsequence of two sequences
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
913 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
914 if i == 0 or j == 0:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
915 return []
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
916 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
917 return self.subSequence(i, j-1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
918 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
919 return self.subSequence(i-1, j)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
920 else:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
921 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
922
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
923 def _compute(self, _l1, _l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
924 '''returns the longest common subsequence similarity
689
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
925 l1 and l2 should be the right format
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
926 eg list of tuple points for cdist
9990ef119bce added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 687
diff changeset
927 or elements that can be compare using similarityFunc
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
928
607
84690dfe5560 add some functions for behaviour analysis
MohamedGomaa
parents: 574
diff changeset
929 if aligned, returns the best matching if using a finite delta by shifting the series alignments
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
930 '''
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
931 if len(_l2) < len(_l1): # l1 is the shortest
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
932 l1 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
933 l2 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
934 revertIndices = True
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
935 else:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
936 l1 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
937 l2 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
938 revertIndices = False
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
939 n1 = len(l1)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
940 n2 = len(l2)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
941
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
942 if self.aligned:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
943 lcssValues = {}
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
944 similarityTables = {}
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
945 for i in range(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
946 self.similarities(l1, l2, i)
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
947 lcssValues[i] = self.similarityTable.max()
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
948 similarityTables[i] = self.similarityTable
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
949 #print self.similarityTable
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
950 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
951 self.similarityTable = similarityTables[alignmentShift]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
952 else:
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
953 alignmentShift = 0
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
954 self.similarities(l1, l2)
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
955
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
956 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
957 self.similarityTable = self.similarityTable[:min(n1, n2+alignmentShift+self.delta)+1, :min(n2, n1-alignmentShift+self.delta)+1]
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
958
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
959 if computeSubSequence:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
960 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
371
924e38c9f70e work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 370
diff changeset
961 if revertIndices:
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
962 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
963 return self.similarityTable[-1,-1]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
964
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
965 def compute(self, l1, l2, computeSubSequence = False):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
966 '''get methods are to be shadowed in child classes '''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
967 return self._compute(l1, l2, computeSubSequence)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
968
375
2ea8584aa80a making indicator LCSS work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 374
diff changeset
969 def computeAlignment(self):
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
970 return mean([j-i for i,j in self.subSequenceIndices])
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
971
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
972 def _computeNormalized(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
973 ''' compute the normalized LCSS
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
974 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
975 lengthFunc = lambda x,y:float(x,y)/2'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
976 return float(self._compute(l1, l2, computeSubSequence))/self.lengthFunc(len(l1), len(l2))
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
977
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
978 def computeNormalized(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
979 return self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
980
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
981 def _computeDistance(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
982 ''' compute the LCSS distance'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
983 return 1-self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
984
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
985 def computeDistance(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
986 return self._computeDistance(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
987
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
988 #########################
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
989 # plotting section
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
990 #########################
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
991
940
d8ab183a7351 verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 876
diff changeset
992 def plotPolygon(poly, options = '', **kwargs):
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
993 'Plots shapely polygon poly'
940
d8ab183a7351 verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 876
diff changeset
994 x,y = poly.exterior.xy
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
995 plt.plot(x, y, options, **kwargs)
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
996
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
997 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1):
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
998 '''for each value in X, increment by increment the initial count
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
999 returns the lists that can be plotted
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
1000 to obtain a step plot increasing by one for each value in x, from first to last value
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
1001 firstX and lastX should be respectively smaller and larger than all elements in X'''
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1002
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1003 sortedX = []
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1004 counts = [initialCount]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1005 for x in sorted(X):
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1006 sortedX += [x,x]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1007 counts.append(counts[-1])
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
1008 counts.append(counts[-1]+increment)
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1009 counts.append(counts[-1])
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1010 return [firstX]+sortedX+[lastX], counts
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
1011
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
1012 class PlottingPropertyValues(object):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1013 def __init__(self, values):
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1014 self.values = values
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1015
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
1016 def __getitem__(self, i):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1017 return self.values[i%len(self.values)]
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1018
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1019 markers = PlottingPropertyValues(['+', '*', ',', '.', 'x', 'D', 's', 'o'])
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
1020 scatterMarkers = PlottingPropertyValues(['s','o','^','>','v','<','d','p','h','8','+','x'])
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1021
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1022 linestyles = PlottingPropertyValues(['-', '--', '-.', ':'])
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1023
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1024 colors = PlottingPropertyValues('brgmyck') # 'w'
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1025
990
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1026 def monochromeCycler(withMarker = False):
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1027 from cycler import cycler
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1028 if withMarker:
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1029 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']) * cycler('marker', ['^',',', '.']))
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1030 else:
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1031 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']))
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1032 plt.rc('axes', prop_cycle=monochrome)
94bee7b604eb addition
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 978
diff changeset
1033
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
1034 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1):
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
1035 coords = array(list(indicatorMap.keys()))
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1036 minX = min(coords[:,0])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1037 minY = min(coords[:,1])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1038 X = arange(minX, max(coords[:,0])+1.1)*squareSize
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1039 Y = arange(minY, max(coords[:,1])+1.1)*squareSize
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
1040 C = defaultValue*ones((len(Y), len(X)))
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
1041 for k,v in indicatorMap.items():
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1042 C[k[1]-minY,k[0]-minX] = v
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
1043 if masked:
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
1044 plt.pcolor(X, Y, ma.masked_where(C==defaultValue,C))
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
1045 else:
1029
c6cf75a2ed08 reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1028
diff changeset
1046 plt.pcolor(X, Y, C)
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
1047
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
1048 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1049 # Data download
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1050 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1051
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1052 def downloadECWeather(stationID, years, months = [], outputDirectoryname = '.', english = True):
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1053 '''Downloads monthly weather data from Environment Canada
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1054 If month is provided (number 1 to 12), it means hourly data for the whole month
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1055 Otherwise, means the data for each day, for the whole year
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1056
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1057 Example: MONTREAL MCTAVISH 10761
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1058 MONTREALPIERRE ELLIOTT TRUDEAU INTL A 5415
856
e310577cc0b8 updated function (url) for weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 855
diff changeset
1059 see ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station%20Inventory%20EN.csv
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1060
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1061 To get daily data for 2010 and 2011, downloadECWeather(10761, [2010,2011], [], '/tmp')
973
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
1062 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp')
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
1063
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
1064 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
1065 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 971
diff changeset
1066 '''
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
1067 import urllib.request
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1068 if english:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1069 language = 'e'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1070 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1071 language = 'f'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1072 if len(months) == 0:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1073 timeFrame = 2
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1074 months = [1]
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1075 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1076 timeFrame = 1
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1077
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1078 for year in years:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1079 for month in months:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1080 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1081 if timeFrame == 1:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1082 outFilename += '-{}-hourly'.format(month)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1083 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1084 outFilename += '-daily'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1085 outFilename += '.csv'
997
4f3387a242a1 updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 990
diff changeset
1086 url = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=Download+Data'.format(language, stationID, year, month, timeFrame), outFilename)
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1087
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1088 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
1089 # File I/O
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
1090 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
1091
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1092 def removeExtension(filename, delimiter = '.'):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1093 '''Returns the filename minus the extension (all characters after last .)'''
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1094 i = filename.rfind(delimiter)
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1095 if i>0:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1096 return filename[:i]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1097 else:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1098 return filename
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1099
969
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1100 def getExtension(filename, delimiter = '.'):
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1101 '''Returns the filename minus the extension (all characters after last .)'''
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1102 i = filename.rfind(delimiter)
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1103 if i>0:
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1104 return filename[i+1:]
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1105 else:
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1106 return ''
5d788d2e8ffc work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 940
diff changeset
1107
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
1108 def cleanFilename(s):
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
1109 'cleans filenames obtained when contatenating figure characteristics'
739
25e78d756823 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 737
diff changeset
1110 return s.replace(' ','-').replace('.','').replace('/','-').replace(',','')
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
1111
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1112 def getRelativeFilename(parentPath, filename):
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1113 'Returns filename if absolute, otherwise parentPath/filename as string'
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1114 filePath = Path(filename)
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1115 if filePath.is_absolute():
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1116 return filename
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1117 else:
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1118 return str(parentPath/filePath)
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1119
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1120 def listfiles(dirname, extension, remove = False):
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1121 '''Returns the list of files with the extension in the directory dirname
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1122 If remove is True, the filenames are stripped from the extension'''
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1123 d = Path(dirname)
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1124 if d.is_dir():
1089
10205bd0e0b7 corrected bug
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 1088
diff changeset
1125 tmp = [str(f) for f in d.glob('*.'+extension)]
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1126 if remove:
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1127 return [removeExtension(f, extension) for f in tmp]
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1128 else:
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1129 return tmp
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1130 else:
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1131 print(dirname+' is not a directory')
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1132 return []
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1133
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1134 def mkdir(dirname):
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1135 'Creates a directory if it does not exist'
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1136 p = Path(dirname)
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1137 if not p.exists():
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1138 p.mkdir()
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1139 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1140 print(dirname+' already exists')
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1141
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1142 def removeFile(filename):
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1143 '''Deletes the file while avoiding raising an error
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1144 if the file does not exist'''
1021
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1145 f = Path(filename)
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1146 if (f.exists()):
16932cefabc1 work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 997
diff changeset
1147 f.unlink()
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1148 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
1149 print(filename+' does not exist')
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
1150
42
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1151 def line2Floats(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1152 '''Returns the list of floats corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1153 return [float(x) for x in l.split(separator)]
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1154
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1155 def line2Ints(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1156 '''Returns the list of ints corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
1157 return [int(x) for x in l.split(separator)]
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1158
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1159 #########################
553
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1160 # Profiling
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1161 #########################
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1162
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1163 def analyzeProfile(profileFilename, stripDirs = True):
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1164 '''Analyze the file produced by cProfile
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1165
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1166 obtained by for example:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1167 - call in script (for main() function in script)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1168 import cProfile, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1169 cProfile.run('main()', os.path.join(os.getcwd(),'main.profile'))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1170
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1171 - or on the command line:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1172 python -m cProfile [-o profile.bin] [-s sort] scriptfile [arg]'''
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1173 import pstats, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1174 p = pstats.Stats(os.path.join(os.pardir, profileFilename))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1175 if stripDirs:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1176 p.strip_dirs()
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1177 p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1178 p.print_stats(.2)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1179 #p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1180 # p.print_callees(.1, 'int_prediction.py:')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1181 return p
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1182
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
1183 #########################
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1184 # running tests
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1185 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1186
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1187 if __name__ == "__main__":
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1188 import doctest
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1189 import unittest
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1190 suite = doctest.DocFileSuite('tests/utils.txt')
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
1191 #suite = doctest.DocTestSuite()
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1192 unittest.TextTestRunner().run(suite)
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
1193 #doctest.testmod()
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1194 #doctest.testfile("example.txt")