#!/usr/bin/env python # Wenchang Yang (wenchang@princeton.edu) # Tue Oct 22 16:00:38 EDT 2019 if __name__ == '__main__': from misc.timer import Timer tt = Timer(f'start {__file__}') from datetime import datetime import os.path, sys, os import xarray as xr, numpy as np, pandas as pd #import matplotlib.pyplot as plt if __name__ == '__main__': tt.check('end import') #ifile = '../../IBTrACS.ALL.v04r00.nc' ifile = '../../IBTrACS.ALL.v04r00.2022-01-25.nc' #years = range(1980, 2019+1) years = range(1980, 2021+1) n_years = len(years) n_storms_max = 200 n_stages_max = 360 ofile = os.path.basename(ifile).replace('.nc', f'.tracksByYear.{years[0]}-{years[-1]}.nc' ) if os.path.exists(ofile): print('[exists]:', ofile) sys.exit() if __name__ == '__main__': # tc_year is based on the year of the initial time point ds = xr.open_dataset(ifile) ds['tc_year'] = xr.DataArray([int( t.astype('str').split('-')[0] ) for t in ds.time.isel(date_time=0).values], dims='storm') # the given season is wrong da = xr.DataArray(np.zeros(shape=(n_years, n_storms_max, n_stages_max)), dims=['year', 'storm', 'stage'], coords=[years, np.arange(1, n_storms_max+1), np.arange(n_stages_max)*3] ).pipe(lambda x: x.where(x>0) ) yr = da.copy() mo = da.copy() dy = da.copy() hr = da.copy() lat = da.copy() lon = da.copy() windmax = da.copy() slp = da.copy() for year in years: # n_storms = ds.number.isel(storm=ds.tc_year==year).count().item() # not reliable since some end-of-year storms are grouped into next year n_storms = ds.time[(ds.tc_year==year)].storm.size print(f'{year}/{years[-1]}: {n_storms} storms') # for storm in range(1, n_storms+1): for storm,time in enumerate(ds.time[(ds.tc_year==year)], start=1): # time shape (n_stages_max,); storm starts from 1 to n_storms print(storm, end='; ') year_vec = np.array([int( t.astype('str').split('-')[0] ) if t.astype('str')!='NaT' else np.nan for t in time.values]) yr.loc[dict(year=year, storm=storm)] = year_vec month_vec = np.array([int( t.astype('str').split('-')[1] ) if t.astype('str')!='NaT' else np.nan for t in time.values]) mo.loc[dict(year=year, storm=storm)] = month_vec day_vec = np.array([int( t.astype('str').split('-')[2][0:2] ) if t.astype('str')!='NaT' else np.nan for t in time.values]) dy.loc[dict(year=year, storm=storm)] = day_vec hour_vec = np.array([int( t.astype('str').split('-')[2][3:5] ) if t.astype('str')!='NaT' else np.nan for t in time.values]) hr.loc[dict(year=year, storm=storm)] = hour_vec lat.loc[dict(year=year, storm=storm)] = \ ds.lat[ds.tc_year==year].isel(storm=storm-1).values # storm starts with 1 lon.loc[dict(year=year, storm=storm)] = \ ds.lon[ds.tc_year==year].isel(storm=storm-1).values windmax.loc[dict(year=year, storm=storm)] = \ ds.usa_wind[ds.tc_year==year].isel(storm=storm-1).values slp.loc[dict(year=year, storm=storm)] = \ ds.usa_pres[ds.tc_year==year].isel(storm=storm-1).values print() # create a new Dataset to hold the variables windmax = windmax * 0.514444 # knot to m/s windmax = windmax.assign_attrs(units='m/s', long_name='usa_wind') slp = slp.assign_coords(units='mb', long_name='usa_pres') ds_out = xr.Dataset(dict( yr=yr, month=mo, day=dy, hour=hr, lat=lat, lon=lon, windmax=windmax, slp=slp)) #save to netcdf encoding = {v:{'dtype': 'float32', 'zlib':True, 'complevel': 1} for v in ['yr', 'month', 'day', 'hour', 'year', 'storm', 'stage']} for v in ['lat', 'lon', 'windmax', 'slp']: encoding[v] = {'dtype':'float32', 'zlib':True, 'complevel': 1} ds_out.to_netcdf(ofile, encoding=encoding) print('[saved]:', ofile) tt.check('**Done**')