#!/usr/bin/env python # Wenchang Yang (wenchang@princeton.edu) import datetime, glob, sys, os.path import numpy as np, xarray as xr, pandas as pd ifile = 'hurdat2-1851-2024-040425.txt' # columns of the txt file names = ['date', 'time', 'stormLabel', 'stormStatus', 'lat', 'lon', 'windmax', 'slp'] + [f's{ii:02d}' for ii in range(13)] # use pandas to read the txt file #df = pd.read_csv(ifile, '\s+', names=names) df_all = pd.read_csv(ifile, names=names) def get_lat_float(lat_string): """convert lat string to float, e.g. 25.0N -> 25.0""" if type(lat_string) is str: if lat_string.endswith('N'): return float(lat_string[:-1]) elif lat_string.endswith('S'): reurn -float(lat_string[:-1]) else: return float(lat_string) else: return float(lat_string) def get_lon_float(lon_string): """convert lon string to float, e.g. 25.0W -> -25.0""" if type(lon_string) is str: if lon_string.endswith('E'): return float(lon_string[:-1]) elif lon_string.endswith('W'): return -float(lon_string[:-1]) else: return float(lon_string) else: return float(lon_string) def tc_read(year, n_storms_bound=None, hour24=False): '''Extract TC information from txt file and convert to Dataset given year Return: xr.Dataset ''' year = str(year) #select data of the specified year only Lyear = np.array([d.startswith(year) or d.endswith(year) for d in df_all.date]) # select lines of the year df = df_all[Lyear].reset_index() #select the specified year and reset the index L = np.array([d.startswith('AL') for d in df.date]) # select lines of the year isummaries = df.index[L] istarts = isummaries + 1 # indices of storm starts iends = np.hstack( ( isummaries[1:]-1, df.index[-1])) # indices of storm ends # length of the storm dimension if n_storms_bound is None: n_storms_bound = L.sum() #n_storms_bound = L.sum() #names = ['date', 'time', 'stormLabel', 'stormStatus', 'lat', 'lon', 'windmax', 'slp'] + [f's{ii:02d}' for ii in range(13)] # save each variable associated the storms into a 2D ndarray shape = (n_storms_bound, 120) # n_storms, n_steps(6-hourly) lat = np.zeros(shape) + np.nan lon = np.zeros(shape) + np.nan windmax = np.zeros(shape) + np.nan slp = np.zeros(shape) + np.nan month = np.zeros(shape) + np.nan day = np.zeros(shape) + np.nan hour = np.zeros(shape) + np.nan # stormLabel = np.array([[' ']*120]*n_storms_bound) # label of storms, e.g. 'L' means landfall of the storm center stormStatus = np.array([[' ']*120]*n_storms_bound) #status of storms, e.g, 'HU' means hurricane, 'TS' means tropical storm stormName = np.array([' '*10]*n_storms_bound) # loop over all storms for i, (istart, iend) in enumerate(zip(istarts, iends)): nsteps = min(iend + 1 - istart, shape[1]) # number of steps along the track of a storm, sometimes greater than the specified number of steps shape[1] ilocs = slice(istart, istart+nsteps) _lats = np.array([get_lat_float(s) for s in df.iloc[ilocs]['lat'].values]) lat[i, :nsteps] = _lats _lons = np.array([get_lon_float(s) for s in df.iloc[ilocs]['lon'].values]) lon[i, :nsteps] = _lons windmax[i, :nsteps] = df.iloc[ilocs]['windmax'] slp[i, :nsteps] = df.iloc[ilocs]['slp'] time = pd.Index([ datetime.datetime(year=int(sdate[0:4]), month=int(sdate[4:6]), day=int(sdate[6:8]), hour=int(stime[:-2])) for sdate,stime in zip(df.iloc[ilocs]['date'].values, df.iloc[ilocs]['time'].values) ]) if hour24:# hours in the format of 06/12/18/24 time = time.shift(-1, 'H') # shift the time by 1 hour earlier to avoid problems associated with time boundary hour[i, :nsteps] = time.hour + 1 else: #hours in the format of 00/06/12/18 hour[i, :nsteps] = time.hour month[i, :nsteps] = time.month day[i, :nsteps] = time.day #storm labels _stormLabels = np.array([s.strip() for s in df.iloc[ilocs]['stormLabel'].values]) stormLabel[i, :nsteps] = _stormLabels #storm status _stormStatus = np.array([s.strip() for s in df.iloc[ilocs]['stormStatus'].values]) stormStatus[i, :nsteps] = _stormStatus stormName[i] = df.iloc[istart-1]['time'].strip() ##test #print(_stormLabels) #print(stormLabel[i,:nsteps]) #print(_stormStatus) #print(stormStatus[i,:nsteps]) #print(stormName[i]) #sys.exit() # wrap ndarray into DataArray dims = ('storm', 'stage') storm = xr.DataArray(np.arange(1, shape[0]+1)) stage = xr.DataArray(np.arange(shape[1])*6, attrs={'units': 'hours from genesis'}) coords = [storm, stage] lat = xr.DataArray(lat, dims=dims, coords=coords, attrs={'long_name': 'latitude', 'units': 'degree north'}) lon = xr.DataArray(lon, dims=dims, coords=coords, attrs={'long_name': 'longitude', 'units': 'degree east'}) windmax_knot = xr.DataArray(windmax, dims=dims, coords=coords, attrs={'long_name': 'max wind speed', 'units': 'knot'}) #the original units is knot windmax = xr.DataArray(windmax*0.514444, dims=dims, coords=coords, attrs={'long_name': 'max wind speed', 'units': 'm/s'}) #knots to m/s slp = xr.DataArray(slp, dims=dims, coords=coords, attrs={'long_name': 'sea level pressure', 'units': 'hPa'}) month = xr.DataArray(month, dims=dims, coords=coords) day = xr.DataArray(day, dims=dims, coords=coords) hour = xr.DataArray(hour, dims=dims, coords=coords) stormLabel = xr.DataArray(stormLabel, dims=dims, coords=coords) stormStatus = xr.DataArray(stormStatus, dims=dims, coords=coords) stormName = xr.DataArray(stormName, dims=('storm',), coords=(storm,)) # wrap DataArray into Dataset ds = xr.Dataset(dict(lat=lat, lon=lon, windmax_knot=windmax_knot, windmax=windmax, slp=slp, month=month, day=day, hour=hour, stormLabel=stormLabel, stormStatus=stormStatus, stormName=stormName)) ds = ds.where(ds!=-999) # -999 to NaN return ds years = range(1851,2024+1) ofile = f'tracks.hurdat2.{years[0]}-{years[-1]}.nc' if os.path.exists(ofile) and 'o' not in sys.argv: print('[exists]:', ofile); sys.exit() dss = [] for year in years: print(year) ds = tc_read(year) dss.append(ds) ds = xr.concat(dss, dim=pd.Index(years, name='year')) #save ds.to_netcdf(ofile) print('[saved]:', ofile)