#!/usr/bin/env python #2022-03-02: copy from /tigress/wenchang/analysis/climate_and_disease/data/China/wyget_era5_t2m_subregion_update20211027.py if __name__ == '__main__': import sys from misc.timer import Timer wyt = Timer('start ' + ' '.join(sys.argv)) import xarray as xr import matplotlib.pyplot as plt import os, os.path, sys, time import salem import pandas as pd import numpy as np import geoxarray from misc.shell import run_shell #cntry_name = 'Cambodia' #cntry_name_short = 'KHM' cntry_name = sys.argv[1] d = dict(China='CHN', Colombia='COL', Ecuador='ECU', ElSalvador='SLV', Ethiopia='ETH', Kenya='KEN', Laos='LAO', Malaysia='MYS', Mozambique='MOZ', Nicaragua='NIC', Peru='PER', Philippines='PHL', USA='USA', Singapore='SGP', SriLanka='LKA', Taiwan='TWN', Thailand='THA', Uganda='UGA', Venezuela='VEN', Vietnam='VNM', PuertoRico='PRI') cntry_name_short = d[cntry_name] cntry_sname = cntry_name.replace(' ', '') daname = 't_ref' dsname = 'land_daily' dname_long = '2m_temperature' model = 'AM4_urban' expname = 'amip1870_urban_luh2_wasteCool_tigercpu_intelmpi_18_576PE' ens = 0 shapefile = f'../shapefiles/gadm40_{cntry_name_short}_shp/gadm40_{cntry_name_short}_1.shp' print(f'{shapefile = }') shdf = salem.read_shapefile(shapefile) #name_cols = ['ADM1_EN', 'ADM1_PCODE'] name_col = 'NAME_1' #t = pd.date_range('1999-01', '2020-12', freq='MS') #years, months = t.year.values, t.month.values years = range(1991, 2020+1) ofile = f'{daname}.{dsname}.{model}.{expname}.{cntry_sname}.subregion.ens{ens:02d}.{years[0]}-{years[-1]}.nc' if os.path.exists(ofile): print('[exists]:', ofile) sys.exit() if not os.path.exists('tmp'): os.symlink('/home/wenchang/scratch/TMP/tmp', 'tmp') print('[symlink created]: /home/wenchang/scratch/TMP/tmp -> tmp') ofiles = [] for year in years: print(f'{cntry_name}; {daname}; {dsname}; {ens = :02d}; {year = :04d}') ifile = f'/tigress/wenchang/MODEL_OUT/{model}/{expname}/en{ens:02d}/POSTP/{year:04d}0101.{dsname}.nc' if year>2014: ifile = ifile.replace(f'{expname}', f'{expname}_extend') #years >2014 in the extend dir ofile = f'tmp/{daname}.{dsname}.{model}.{expname}.{cntry_sname}.subregion.ens{ens:02d}.{year:04d}.nc' ofiles.append(ofile) if os.path.exists(ofile): print('[exists]:', ofile) continue print(ifile, 'loading...') ds = xr.open_dataset(ifile)[[daname]].load().rename(dict(grid_xt='lon', grid_yt='lat')) ds = ds.drop(['geolon_t', 'geolat_t']) #not needed # transform lon [0,360) -> [-180,180) if ds.lon.max() > 180: ds = ds.roll(lon=ds.lon.size//2, roll_coords=True) lon_new = ds.lon.where(ds.lon<180, other=ds.lon-360).values ds = ds.assign_coords(lon=lon_new) #weight file wgt_file = f'/tigress/wenchang/MODEL_OUT/{model}/{expname}/en{ens:02d}/POSTP/{year:04d}0101.land_static.nc' if year>2014: wgt_file = wgt_file.replace(f'{expname}', f'{expname}_extend') #years >2014 in the extend dir wgt = xr.open_dataset(wgt_file)['land_area'].load().rename(dict(grid_xt='lon', grid_yt='lat')) # transform lon [0,360) -> [-180,180) if wgt.lon.max() > 180: wgt = wgt.roll(lon=wgt.lon.size//2, roll_coords=True) lon_new = wgt.lon.where(wgt.lon<180, other=wgt.lon-360).values wgt = wgt.assign_coords(lon=lon_new) das = [] #region_IDs = [] region_names = [] N = len(shdf) for i in range(N): shape_sub = shdf.iloc[i:i+1, :] r = shdf.iloc[i, :] #lon_slice = slice(r.min_x, r.max_x) #lat_slice = slice(r.min_y, r.max_y) #g = r.geometry region_name = '_'.join( r[name_col].split() ) #name_col = 'NAME_1' try: #da = ds[daname].salem.roi(geometry=g).geo.fldmean() da = ds[daname].salem.subset(shape=shape_sub, margin=1) \ .salem.roi(shape=shape_sub) #weights of subregion wgt_sub = wgt.salem.subset(shape=shape_sub, margin=1) \ .salem.roi(shape=shape_sub) \ .fillna(0) if year==years[0] and i==0: da.isel(time=0).plot() plt.savefig('wytest_data.png') plt.close() wgt_sub.plot() plt.savefig('wytest_wgt.png') plt.close() #weighted mean da = da.weighted(wgt_sub).mean(['lon', 'lat']) print('[OK]:', i+1,'of', N, region_name) except ValueError: #sub shape area is too small to be selected; use value interpolated on centroid instead centroid0 = shape_sub.centroid.iloc[0] lon0 = centroid0.x lat0 = centroid0.y da = ds[daname].interp(lat=lat0, lon=lon0).drop(['lon', 'lat']) #print('\t[Failed]', i+1,'of', N, region_name) print('\t[single point used]', i+1,'of', N, region_name) das.append(da) region_names.append(region_name) i_record = range(len(das)) da = xr.concat(das, pd.Index(region_names, name='region_name')).transpose() da.attrs = ds[daname].attrs ds_new = da.to_dataset(name=daname) ds_new.attrs = ds.attrs ds_new.attrs['region_name'] = '; '.join(region_names) ds_new.to_netcdf(ofile, unlimited_dims='time', encoding={daname: {'_FillValue': None}}) print('[saved]:', ofile) #ncrcat ofile = ofiles[0].replace('tmp/', '').replace(f'{years[0]}', f'{years[0]}-{years[-1]}') if os.path.exists(ofile): print('[exists]:', ofile) else: cmd = f'ncrcat {" ".join(ofiles)} {ofile}' run_shell(cmd) if __name__ == '__main__': wyt.check('**done**')