comparison python/storage.py @ 718:2cd245cb780d

added option to set low_memory = False for pandas.read_csv
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 30 Jul 2015 17:43:14 -0400
parents d6c69d3d09e5
children 49e99ca34a7d
comparison
equal deleted inserted replaced
717:9d6cd4e8dca3 718:2cd245cb780d
699 else: return self.fp.readline() 699 else: return self.fp.readline()
700 700
701 def generatePDLaneColumn(data): 701 def generatePDLaneColumn(data):
702 data['LANE'] = data['LANE\LINK\NO'].astype(str)+'_'+data['LANE\INDEX'].astype(str) 702 data['LANE'] = data['LANE\LINK\NO'].astype(str)+'_'+data['LANE\INDEX'].astype(str)
703 703
704 def loadTrajectoriesFromVissimFile(filename, simulationStepsPerTimeUnit, nObjects = -1, warmUpLastInstant = None, usePandas = False, nDecimals = 2): 704 def loadTrajectoriesFromVissimFile(filename, simulationStepsPerTimeUnit, nObjects = -1, warmUpLastInstant = None, usePandas = False, nDecimals = 2, lowMemory = True):
705 '''Reads data from VISSIM .fzp trajectory file 705 '''Reads data from VISSIM .fzp trajectory file
706 simulationStepsPerTimeUnit is the number of simulation steps per unit of time used by VISSIM 706 simulationStepsPerTimeUnit is the number of simulation steps per unit of time used by VISSIM
707 for example, there seems to be 5 simulation steps per simulated second in VISSIM, 707 for example, there seems to be 5 simulation steps per simulated second in VISSIM,
708 so simulationStepsPerTimeUnit should be 5, 708 so simulationStepsPerTimeUnit should be 5,
709 so that all times correspond to the number of the simulation step (and can be stored as integers) 709 so that all times correspond to the number of the simulation step (and can be stored as integers)
714 Assumed to be sorted over time''' 714 Assumed to be sorted over time'''
715 objects = {} # dictionary of objects index by their id 715 objects = {} # dictionary of objects index by their id
716 716
717 if usePandas: 717 if usePandas:
718 from pandas import read_csv 718 from pandas import read_csv
719 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1) 719 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, low_memory = lowMemory)
720 generatePDLaneColumn(data) 720 generatePDLaneColumn(data)
721 data['TIME'] = data['$VEHICLE:SIMSEC']*simulationStepsPerTimeUnit 721 data['TIME'] = data['$VEHICLE:SIMSEC']*simulationStepsPerTimeUnit
722 if warmUpLastInstant is not None: 722 if warmUpLastInstant is not None:
723 data = data[data['TIME']>=warmUpLastInstant] 723 data = data[data['TIME']>=warmUpLastInstant]
724 grouped = data.loc[:,['NO','TIME']].groupby(['NO'], as_index = False) 724 grouped = data.loc[:,['NO','TIME']].groupby(['NO'], as_index = False)
780 (format as string x_y where x is link index and y is lane index)''' 780 (format as string x_y where x is link index and y is lane index)'''
781 from pandas import read_csv 781 from pandas import read_csv
782 columns = ['NO', '$VEHICLE:SIMSEC', 'POS'] 782 columns = ['NO', '$VEHICLE:SIMSEC', 'POS']
783 if lanes is not None: 783 if lanes is not None:
784 columns += ['LANE\LINK\NO', 'LANE\INDEX'] 784 columns += ['LANE\LINK\NO', 'LANE\INDEX']
785 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = columns) 785 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = columns, low_memory = lowMemory)
786 data = selectPDLanes(data, lanes) 786 data = selectPDLanes(data, lanes)
787 data.sort(['$VEHICLE:SIMSEC'], inplace = True) 787 data.sort(['$VEHICLE:SIMSEC'], inplace = True)
788 788
789 nStationary = 0 789 nStationary = 0
790 nVehicles = 0 790 nVehicles = 0
804 one checks when the sign of the position difference inverts 804 one checks when the sign of the position difference inverts
805 (if the time are closer than collisionTimeDifference) 805 (if the time are closer than collisionTimeDifference)
806 If lanes is not None, only the data for the selected lanes will be provided 806 If lanes is not None, only the data for the selected lanes will be provided
807 (format as string x_y where x is link index and y is lane index)''' 807 (format as string x_y where x is link index and y is lane index)'''
808 from pandas import read_csv, merge 808 from pandas import read_csv, merge
809 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = ['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC', 'NO', 'POS']) 809 data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = ['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC', 'NO', 'POS'], low_memory = lowMemory)
810 data = selectPDLanes(data, lanes) 810 data = selectPDLanes(data, lanes)
811 merged = merge(data, data, how='inner', left_on=['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC'], right_on=['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC'], sort = False) 811 merged = merge(data, data, how='inner', left_on=['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC'], right_on=['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC'], sort = False)
812 merged = merged[merged['NO_x']>merged['NO_y']] 812 merged = merged[merged['NO_x']>merged['NO_y']]
813 813
814 nCollisions = 0 814 nCollisions = 0