#!/usr/bin/env python # Wenchang Yang (wenchang@princeton.edu) # Sat Nov 14 17:02:20 EST 2020 if __name__ == '__main__': from misc.timer import Timer tt = Timer(f'start {__file__}') import sys, os.path, os, glob import xarray as xr, numpy as np, pandas as pd #import matplotlib.pyplot as plt #more imports # if __name__ == '__main__': tt.check('end import') # #start from here # load input data if len(sys.argv) > 1: ifile = sys.argv[1] else: ifile = '/home/hsiehtl/tigress/raw_tracks/amipHadISST_tigercpu_intelmpi_18_540PE_en01_yr1971to2018_mo1to12_rainPerc99.5_distThresh2.9_sizeThresh3.1_latLim30.nc' print('[ifile]:', ifile) print() # create a local symbolic link of the ifile if not created yet ofile = os.path.basename(ifile) if not os.path.exists(ofile): os.symlink(ifile, ofile) print('[link created]:', ifile, '-->', ofile) print() # exit if the target nc file already exists ofile = ofile.replace('.nc', '.wy.nc') # name of the nc file to be saved if os.path.exists(ofile): print('[exists]:', ofile) print() sys.exit() # load data from ifile da = xr.open_dataarray(ifile) da = da.transpose('info', 'track', 'lifetime') da.load() # collect dimension and coordinate information for the new ndarray with shape (n_info, n_years, n_storms_max, n_stages) year_ = da.isel(info=0, lifetime=0) years = np.arange(year_.min().item(), year_.max().item()+1) n_years = years.size # # of years n_storms_max = year_.groupby(year_).count().max().item() #max # of storms per year n_info = da.info.size n_stages = da.lifetime.size # # of points along the stage/lifetime dimension # create a new ndarray of nan (zz) to hold the data zz = np.zeros(shape=(n_info, n_years, n_storms_max, n_stages)) + np.nan # fill zz with values of each year from the input for i,yr in enumerate(years): print(f'{yr} of {years[-1]}') # sel data of the year tmp = da.isel(track=(year_==yr)) #dims: info, track, lifetime zz[:, i, 0:tmp.track.size, :] = tmp.values # zz dims: (info, year, storm, stage/lifetime) # ndarray to DataArray da_new = xr.DataArray(zz, dims=('info', 'year', 'storm', 'stage')) #dimension name changes from the original data: track --> storm; lifetime --> stage da_new = da_new.assign_coords(year=years, storm=range(1, n_storms_max+1), stage=np.arange(da_new.stage.size)*6) #da_new.stage.attrs['units'] = 'hours' # DataArray to Dataset vs = 'yyyy', 'lat', 'lon', 'vort', 'size', 'vref', 'month', 'day', 'hour' #yyyy is the variable name while year is the dimension name dc = {key:value for (key,value) in zip(vs, da_new)} # dictionary data ds = xr.Dataset(dc) # dict to Dataset # save ds to nc file nc_encoding = {vname:{'dtype':'float32', 'zlib': True, 'complevel': 1} for vname in vs+('year', 'storm', 'stage')} ds.to_netcdf(ofile, encoding=nc_encoding) print('[saved]:', ofile) if __name__ == '__main__': #from wyconfig import * #my plot settings tt.check(f'**Done**') #plt.show()