#!/usr/bin/env python #update2021-10-27: use interp to get values over sub regions that are too tiny to be resolved by input data grids. impact: Macao now has values if __name__ == '__main__': from misc.timer import Timer wyt = Timer(f'start {__file__}') import xarray as xr import os, os.path, sys, time import salem import pandas as pd import numpy as np import geoxarray from misc.shell import run_shell from misc import get_kws_from_argv region_name = 'Colorado' data_name = get_kws_from_argv('data_name', 't2m') #other options: mx2t, mn2t if data_name == 't2m': dname_long = '2m_temperature' elif data_name == 'mx2t': dname_long = '2m_temperature_max' elif data_name == 'mn2t': dname_long = '2m_temperature_min' elif data_name == 'q2m': dname_long = '2m_specific_humidity' xname = 'longitude' yname = 'latitude' shape_file = 'shape/marmot_polygons_wgs84.shp' gdf = salem.read_shapefile(shape_file) gdf = gdf.to_crs(epsg=4326) #the raw crs has units of meter; convert to degrees of lon/lat #name_cols = ['ADM1_EN', 'ADM1_PCODE'] name_col = 'Site' #t = pd.date_range('2009-01', '2014-12', freq='MS') t = pd.date_range('1979-01', '2024-12', freq='MS') years, months = t.year.values, t.month.values print(data_name) ofiles = [] for year,month in zip(t.year, t.month): print(f'{year:04d}-{month:02d}') ifile = f'/tigress/wenchang/data/era5/analysis/{dname_long}/daily/era5.{dname_long}.daily.{year:04d}-{month:02d}.nc' ofile = f'tmp/era5.{data_name}.daily.{region_name}.subregion.{year:04d}-{month:02d}.nc' ofiles.append(ofile) if os.path.exists(ofile): print('[exists]:', ofile) continue print(ifile, 'loading...') ds = salem.open_xr_dataset(ifile).load() # transform lon [0,360) -> [-180,180) ds = ds.roll(longitude=ds.longitude.size//2, roll_coords=True) lon_new = ds.longitude.where(ds.longitude<180, other=ds.longitude-360).values ds = ds.assign_coords(longitude=lon_new) das = [] #region_IDs = [] subregion_names = [] N = len(gdf) for i in range(N): shape_sub = gdf.iloc[i:i+1, :] r = gdf.iloc[i, :] #lon_slice = slice(r.min_x, r.max_x) #lat_slice = slice(r.min_y, r.max_y) g = r.geometry #province, pcode = r[name_cols[0]].split(' ')[0], r[name_cols[1]]#name_cols = ['ADM1_EN', 'ADM1_PCODE'] #province, pcode = '_'.join(r[name_cols[0]].split()), r[name_cols[1]]#name_cols = ['ADM1_EN', 'ADM1_PCODE'] #subregion_name = '_'.join([province,pcode]) subregion_name = r[name_col] try: #da = ds[data_name].salem.roi(geometry=g).geo.fldmean() da = ds[data_name].salem.subset(shape=shape_sub, margin=1) \ .salem.roi(shape=shape_sub)\ .geo.fldmean() print('[OK]:', i+1,'of', N, subregion_name) except: centroid0 = shape_sub.centroid.iloc[0] lon0 = centroid0.x lat0 = centroid0.y da = ds[data_name].interp(latitude=lat0, longitude=lon0).drop(['longitude', 'latitude']) #print('\t[Failed]', i+1,'of', N, region_name) print('\t[single point used]', i+1,'of', N, subregion_name) das.append(da) subregion_names.append(subregion_name) i_record = range(len(das)) da = xr.concat(das, pd.Index(subregion_names, name='subregion_name')).transpose() da.attrs = ds[data_name].attrs ds_new = da.to_dataset(name=data_name) ds_new.attrs = ds.attrs ds_new.attrs['subregion_name'] = '; '.join(subregion_names) #scodes = xr.DataArray(scodes, dims='i_county', coords=[i_county,]) #ccodes = xr.DataArray(ccodes, dims='i_county', coords=[i_county,]) #xr.Dataset(dict(precip=da, scode=scodes, ccode=ccodes)).to_netcdf(f'pr{year}.nc', unlimited_dims='time') ds_new.to_netcdf(ofile, unlimited_dims='time', encoding={data_name: {'_FillValue': None}}) print('[saved]:', ofile) #ncrcat ofile = ofiles[0].replace('tmp/', '').replace(f'{years[0]}-{months[0]:02d}', f'{years[0]}-{years[-1]}') cmd = f'ncrcat {" ".join(ofiles)} {ofile}' run_shell(cmd) if __name__ == '__main__': wyt.check('**done**')