#!/usr/bin/python -OO ############################################################ # # Copyright Yves Dorfsman 2008. # # getdata.py uses CanadianWeather.py to obtain weather data # for several months, or years, and store the resultant list # in a pickle file. # # WARNING: This program can consume very large amounts of # memory. In hourly mode, it requires around 30 MiB of memory # per year of data downloaded. It initially only uses 18 MiB/year # but then needs the extra amount when pickling the data. # This seems modest, but it means 1.5 GiB for 50 years worth # of data (there is data available for over 54 years for Calgary # for example ). Python (version 2.5.1) pages in the whole amount # when it tries to update the list, you need that much real memory, # paging space will not help here. A machine with less real memory # than required will end up trashing and the execution of this script # will never complete. # # Only 1.5 MiB/year is required for daily data. # ############################################################ def main(): import sys import time import cPickle import optparse import CanadianWeather now = time.localtime() thisYear = now.tm_year thisMonth = now.tm_mon if thisMonth == 1: thisYear -= 1 lastMonth = 12 else: lastMonth = thisMonth - 1 del now, thisMonth # The Calgary airport station has the most data, all the way to 1953/01/01. # Station ID for "CALGARY INT'L A": 2205 parser = optparse.OptionParser() parser.add_option('-p', dest='periodicity', help='"h" or "d" for hourly/daily') parser.add_option('-s', dest='stationID', default='2205', help='station ID') parser.add_option('-y', dest='yearFrom', default='1953', help='origin year to use.') parser.add_option('-m', dest='monthFrom', default='1', help='origin month to use.') parser.add_option('-Y', dest='yearTo', default=thisYear, help='end year to use.') text = 'end month to use. If none is provided, and hourly data is ' text += ' requested, the current month will ' text += 'be used, so for example if you run this script on 2008/05/23, ' text += 'and specify "-Y1954" only, it will download data for the period ' text += '1953/01/01 to 1954/05/31.' parser.add_option('-M', dest='monthTo', default=lastMonth, help=text) (options, args) = parser.parse_args() periodicity = options.periodicity stationID = options.stationID yearFrom = int(options.yearFrom) monthFrom = int(options.monthFrom) yearTo = int(options.yearTo) monthTo = int(options.monthTo) # periodicity can only be h or d if periodicity == 'd': periodicity = 'daily' elif periodicity == 'h': periodicity = 'hourly' else: print 'PERIODICITY needs to be either "h" (hourly) or "d" (daily).' print 'Aborting...\n' sys.exit(1) # month has to be a sensible value if not (1 <= monthFrom <= 12 and 1 <= monthTo <= 12): print 'Months have to be between 1 and 12.' print 'Aborting...\n' sys.exit(1) # check there is no date in the future. if (yearTo > thisYear) or \ (yearTo == thisYear and monthTo > lastMonth): print 'The end year cannot be greater than the current year,' print 'and the end month cannot be greater than last month.' print 'Aborting...\n' sys.exit(1) #print 'periodicity:', periodicity #print 'stationID:', stationID #print 'yearFrom:', yearFrom #print 'monthFrom:', monthFrom #print 'yearTo:', yearTo #print 'monthTo:', monthTo alldata = [] if periodicity == 'daily': filename2 = filename4 = '' for Year in xrange(yearFrom, yearTo + 1): print 'Obtaining data for', Year li = CanadianWeather.dataList(stationID, Year) alldata.extend(li) else: filename2 = '-' + str(monthFrom).zfill(2) filename4 = '-' + str(monthTo).zfill(2) for Year in xrange(yearFrom, yearTo + 1): mf = 1 mt = 13 if Year == yearFrom: mf = monthFrom if Year == yearTo: mt = monthTo + 1 for Month in xrange(mf, mt): print 'Obtaining data for', Year, Month li = CanadianWeather.dataList(stationID, Year, Month) alldata.extend(li) filename = periodicity + '_' + stationID + '_' + str(yearFrom) filename += filename2 filename += '_to_' + str(yearTo) filename += filename4 filename += '_' + periodicity + '.pickle' print 'Writing pickled list of', len(alldata), 'rows to file:', filename fo = file(filename, 'w') cPickle.dump(alldata, fo) fo.close() import sys if __name__ == '__main__': sys.exit(main())