#!/usr/bin/env python # Wenchang Yang (wenchang@princeton.edu) # Tue Feb 25 14:26:24 EST 2020 import sys, os.path, os, datetime, glob import xarray as xr, numpy as np, pandas as pd #import matplotlib.pyplot as plt #print() #model = 'FLOR' #expname = ['Agung_PI_ens_noleap', 'CTL1860_noleap_tigercpu_intelmpi_18_576PE'][-1] #ens = None #years = range(1,100+1) #data_tag = 'atmos_month' # output type, e.g. atmos_month, ice_month, etc. def mopen(expname, model='FLOR', ens=None, years=None, data_tag='atmos_month', **kwargs): '''Open MODEL_OUT dataset given expname. Optional params include: model, ens, years, data_tag(output type, e.g. atmos_month) ''' if model == 'FLOR': pdir = f'/tigress/wenchang/MODEL_OUT/{expname}' else: pdir = f'/tigress/wenchang/MODEL_OUT/{model}/{expname}' if ens is None: # guess the possible ens _dirs = glob.glob(f'{pdir}/en*') if _dirs: # not empty ens = [int(d.split('/')[-1][2:]) for d in sorted(_dirs)] # e.g. #print('**ensembles**:', ens) if years is None: if ens is None: _files = glob.glob(f'{pdir}/POSTP/????0101.atmos_month.nc') _files.sort() years = [int(s.split('/')[-1][0:4]) for s in _files] #print('**years**:', years) else: _files = glob.glob(f'{pdir}/en01/POSTP/????0101.atmos_month.nc') _files.sort() years = [int(s.split('/')[-1][0:4]) for s in _files] #print('**years**:', years) # read dataset if ens is None: print('**years**:', years) ifiles = [f'{pdir}/POSTP/{year:04d}0101.{data_tag}.nc' for year in years] print('**ifiles**:\n\t', ifiles[0], '\n\t...\n\t', ifiles[-1]) ds = xr.open_mfdataset(ifiles, combine='by_coords', data_vars='minimal', parallel=True, **kwargs) else: print('**ens**:', ens) print('**years**:', years) ifiles = [[f'{pdir}/en{en:02d}/POSTP/{year:04d}0101.{data_tag}.nc' for year in years] for en in ens] print('**ifiles**:\n\t', ifiles[0][0], '\n\t...\n\t', ifiles[-1][-1]) ds = xr.open_mfdataset(ifiles, combine='nested', parallel=True, concat_dim=['en', 'time'], **kwargs) ds = ds.assign_coords({'en': ens}) return ds def get_data(expname, data_name, odata_name=None, model='FLOR', plev=None, **kwargs): '''get MODEL_OUT data from multiple files and save them into a single netcdf file''' if odata_name is None: odata_name = data_name # check if ofile exists if model == 'FLOR': ofile = f'{expname}.{odata_name}.nc' else: ofile = f'{model}.{expname}.{odata_name}.nc' if os.path.exists(ofile): print('[exists]:', ofile) return da = mopen(expname, **kwargs)[data_name] if plev: # select/inpterpolate onto one single p level da = da.interp(pfull=plev) # save to nc encoding = {odata_name: {'dtype': 'float32', 'zlib': True, 'complevel': 1}} print('saving to netcdf ...') da.load().to_dataset(name=odata_name).to_netcdf(ofile, encoding=encoding) #da.to_dataset(name=odata_name).to_netcdf(ofile) print('[saved]:', ofile) if __name__ == '__main__': tformat = '%Y-%m-%d %H:%M:%S' t0 = datetime.datetime.now() print() print('[start]:', t0.strftime(tformat)) expname = ['Agung_PI_ens_noleap', 'CTL1860_noleap_tigercpu_intelmpi_18_576PE'][0] #da = mopen(expname)['t_surf'] get_data(expname, data_name='t_ref') t1 = datetime.datetime.now() print('[total time used]:', f'{(t1-t0).seconds:,} seconds') print('[end]:', t1.strftime(tformat)) print()