#!/usr/bin/env python # Wenchang Yang (wenchang@princeton.edu) # Wed Jun 25 04:14:42 PM EDT 2025 if __name__ == '__main__': import sys,os try: from misc.timer import Timer tt = Timer(f'[{os.getcwd()}] start ' + ' '.join(sys.argv)) except: pass import sys, os.path, os, glob, datetime import xarray as xr, numpy as np, pandas as pd, matplotlib.pyplot as plt #more imports wython = '/tigress/wenchang/wython' if wython not in sys.path: sys.path.append(wython); print('added to python path:', wython) #from misc import get_kws_from_argv # if __name__ == '__main__': try: tt.check('end import') except: pass # #start from here ifile = 'Data-added-Ingrid-Jun2025.csv' ofile = ifile.replace('.csv', '_subset.csv') df = pd.read_csv(ifile) """ In [75]: df.columns Out[75]: Index(['Reference (authors, date, journal)', 'Year of data collection (Final Year of Data Collection)', 'Location (country)', 'Lower bound of age', 'Upper bound of age', 'Number of people seropositive', 'Number of people tested', 'Latitude', 'Longitude', 'Temp ©', 'Location (town)', 'Location (state)', 'link (if helpful)', 'notes', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18'], dtype='object') """ df = df.loc[~df['Longitude'].isna()] #select valid values #create a new column named "location" locations = [] for ii,r in df.iterrows(): print(ii, r) country = r['Location (country)'] country = country.replace(' ', '').replace('-', '') town = r['Location (town)'] print(type(town)) if type(town) != str: state = r['Location (state)'] state = state.replace(' ', '').replace('-', '') location = f'{country}_{state}' else: town = town.replace(' ', '').replace('-', '') location = f'{country}_{town}' locations.append(location) df['location'] = locations #select only useful columns df = df[['location', 'Latitude', 'Longitude']] #drop duplicates df = df.drop_duplicates() #reindex #df = df.reindex(range(df.index.size)) #save df.to_csv(ofile) print('[saved]:', ofile) if __name__ == '__main__': #from wyconfig import * #my plot settings #savefig if 'savefig' in sys.argv or 's' in sys.argv: figname = __file__.replace('.py', f'.png') if 'overwritefig' in sys.argv or 'o' in sys.argv: wysavefig(figname, overwritefig=True) else: wysavefig(figname) try: tt.check(f'**Done**') except: pass print() if 'notshowfig' in sys.argv or 'n' in sys.argv: pass else: if 'plt' in globals(): plt.show()