{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HIRAM-C360 Performance\n", "\n", "\n", "* Wenchang Yang (wenchang@princeton.edu)\n", "* Department of Geosciences, Princeton University" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T19:47:31.596605Z", "start_time": "2019-06-23T19:47:31.564877Z" }, "code_folding": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "**2019-06-23T15:47:31.581319**\n", ">>> Importing Python 3.7.3 packages...\n", "[OK]: import sys, os, os.path, datetime, glob\n", "[OK]: import numpy as np-1.16.3\n", "[OK]: import matplotlib as mpl-3.0.3; backend: module://ipykernel.pylab.backend_inline\n", "[OK]: #---import matplotlib.pyplot as plt\n", "[OK]: #---from pylab import *\n", "[OK]: import xarray as xr-0.12.1\n", "[OK]: #---import netCDF4\n", "[OK]: #---import dask\n", "[OK]: #---import bottleneck\n", "[OK]: import pandas as pd-0.24.2\n", "[OK]: from mpl_toolkits.basemap import Basemap\n", " PROJ_LIB = /scratch/gpfs/GEOCLIM/wenchang/miniconda3/envs/geoclim/share/proj\n", ">>>Import packages from Wenchang Yang (wython)...\n", "[OK]: import geoplots as gt\n", "[OK]: from geoplots import geoplot, fxyplot, mapplot, xticksyear\n", "[OK]: import geoxarray\n", "[OK]: import filter\n", "[OK]: import xlearn\n", "[OK]: import mysignal as sig\n", "**Done**\n" ] } ], "source": [ "# init\n", "%matplotlib inline\n", "%run -im pythonstartup\n", "%config InlineBackend.figure_format ='retina'\n", "plt.rcParams['figure.dpi'] = 125\n", "from subprocess import check_output\n", "import math" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T19:44:34.204031Z", "start_time": "2019-06-23T19:44:34.199831Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/tigress/wenchang/HIRAM/exp_C360/test_CTL1880s_C360\n" ] } ], "source": [ "cd /tigress/wenchang/HIRAM/exp_C360/test_CTL1880s_C360" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T19:44:37.707739Z", "start_time": "2019-06-23T19:44:37.525580Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0m\u001b[38;5;27mCTL1880s_C360_1080PE_36X\u001b[0m/ HIRAM_namelists_CTL1880s_C360\r\n", "\u001b[38;5;27mCTL1880s_C360_1080PE_40X\u001b[0m/ \u001b[38;5;27mlog\u001b[0m/\r\n", "\u001b[38;5;27mCTL1880s_C360_1440PE_36X\u001b[0m/ \u001b[38;5;34mtest_HIRAM_CTL1880s_C360_540PE\u001b[0m*\r\n", "\u001b[38;5;27mCTL1880s_C360_1440PE_40X\u001b[0m/ \u001b[38;5;51mwork\u001b[0m@\r\n", "\u001b[38;5;27mCTL1880s_C360_720PE_36X\u001b[0m/ wy_HIRAM-C360_PE_scale.ipynb\r\n", "\u001b[38;5;27mCTL1880s_C360_720PE_40X\u001b[0m/\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T19:54:33.118532Z", "start_time": "2019-06-23T19:54:33.106512Z" }, "code_folding": [] }, "outputs": [], "source": [ "def get_dataframe(cases=None):\n", "# if cases is None:\n", "# cases = glob.glob('run_FLOR_ctl_*') \n", " \n", " columns = ('expname', 'ntasks-per-node', 'tot_pes', 'tot_nodes', 'tot_spmy', 'tot_throughput', 'tot_cost')\n", "\n", " records = []\n", " for case in cases:\n", " slurm_logs = glob.glob(f'{case}/slurm-*')\n", " if slurm_logs:\n", " slurm_logs.sort()\n", " else:\n", " continue\n", " \n", " # ntasks-per-node\n", " s = check_output(f'grep ntasks-per-node {case}/test_HIRAM_*', shell=True) \\\n", " .decode('utf-8').strip().split('=')\n", " ntasks_per_node = int(s[-1])\n", " \n", "\n", " for slog in slurm_logs:\n", " # each slurm output has a single PE layout\n", " try:\n", " s = check_output(f'grep \"statistics\" {slog} -A326 |grep \"Total runtime\" -m 1', shell=True) \\\n", " .decode('utf-8').strip().split()\n", " except:\n", " continue # this is from a failed run\n", " tot_pes = int(s[-1]) - int(s[-2]) + 1\n", "# tot_nodes = tot_pes//ntasks_per_node\n", " tot_nodes = math.ceil(tot_pes/ntasks_per_node)\n", " \n", " \n", " # expname\n", " expname = check_output(f'grep expname {slog}', shell=True) \\\n", " .decode('utf-8').strip().split()[-1]\n", " \n", " \n", " # tot seconds per model years and throughput (model years per day)\n", " ss = check_output(f'grep \"statistics\" {slog} -A326 |grep \"Total runtime\"', shell=True) \\\n", " .decode('utf-8').strip().split('\\n')\n", " tot_spmy_list = []\n", " tot_throughput_list = []\n", " tot_cost_list = []\n", " for s in ss:\n", " spmy = float(s.split()[4])*36.5\n", " throughput = 24/(spmy/3600)\n", " tot_spmy_list.append(spmy)\n", " tot_throughput_list.append(throughput)\n", " cost = tot_nodes * 40 * (spmy/3600)\n", " tot_cost_list.append(cost)\n", " \n", " # create records\n", " for (tot_spmy,tot_throughput, tot_cost) in zip(tot_spmy_list, tot_throughput_list,tot_cost_list):\n", " \n", " record = (expname, ntasks_per_node, tot_pes, tot_nodes, tot_spmy, tot_throughput, tot_cost)\n", " records.append(record)\n", "\n", "\n", " df = pd.DataFrame(records, columns=columns)\n", " return df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T19:45:28.911671Z", "start_time": "2019-06-23T19:45:28.708087Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0m\u001b[38;5;27mCTL1880s_C360_1080PE_36X\u001b[0m/ HIRAM_namelists_CTL1880s_C360\r\n", "\u001b[38;5;27mCTL1880s_C360_1080PE_40X\u001b[0m/ \u001b[38;5;27mlog\u001b[0m/\r\n", "\u001b[38;5;27mCTL1880s_C360_1440PE_36X\u001b[0m/ \u001b[38;5;34mtest_HIRAM_CTL1880s_C360_540PE\u001b[0m*\r\n", "\u001b[38;5;27mCTL1880s_C360_1440PE_40X\u001b[0m/ \u001b[38;5;51mwork\u001b[0m@\r\n", "\u001b[38;5;27mCTL1880s_C360_720PE_36X\u001b[0m/ wy_AM4_PE_scale.csv\r\n", "\u001b[38;5;27mCTL1880s_C360_720PE_40X\u001b[0m/ wy_HIRAM-C360_PE_scale.ipynb\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2018-04-02T15:34:28.465428Z", "start_time": "2018-04-02T15:34:28.343191Z" } }, "source": [ "## FLOR_tiger2_intelmpi_18" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2019-06-23T20:40:55.135658Z", "start_time": "2019-06-23T20:40:53.268099Z" }, "code_folding": [], "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataFrame created for cases: ['CTL1880s_C360_1080PE_36X', 'CTL1880s_C360_1080PE_40X', 'CTL1880s_C360_1440PE_36X', 'CTL1880s_C360_1440PE_40X', 'CTL1880s_C360_540PE_36X', 'CTL1880s_C360_540PE_40X', 'CTL1880s_C360_720PE_36X', 'CTL1880s_C360_720PE_40X']\n" ] }, { "data": { "text/html": [ "
| \n", " | expname | \n", "ntasks-per-node | \n", "tot_pes | \n", "tot_nodes | \n", "tot_spmy | \n", "tot_throughput | \n", "tot_cost | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1080PE... | \n", "36 | \n", "1080 | \n", "30 | \n", "21072.930623 | \n", "4.100047 | \n", "7024.310207 | \n", "
| 1 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1080PE... | \n", "36 | \n", "1080 | \n", "30 | \n", "21201.375984 | \n", "4.075207 | \n", "7067.125328 | \n", "
| 2 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1080PE... | \n", "40 | \n", "1080 | \n", "27 | \n", "23859.098043 | \n", "3.621260 | \n", "7157.729413 | \n", "
| 3 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1080PE... | \n", "40 | \n", "1080 | \n", "27 | \n", "24250.046331 | \n", "3.562880 | \n", "7275.013899 | \n", "
| 4 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1440PE... | \n", "36 | \n", "1440 | \n", "40 | \n", "17833.400753 | \n", "4.844841 | \n", "7925.955890 | \n", "
| 5 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1440PE... | \n", "36 | \n", "1440 | \n", "40 | \n", "18233.757135 | \n", "4.738464 | \n", "8103.892060 | \n", "
| 6 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1440PE... | \n", "40 | \n", "1440 | \n", "36 | \n", "20474.630288 | \n", "4.219856 | \n", "8189.852115 | \n", "
| 7 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_1440PE... | \n", "40 | \n", "1440 | \n", "36 | \n", "20797.870820 | \n", "4.154271 | \n", "8319.148328 | \n", "
| 8 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_540PE_36X | \n", "36 | \n", "540 | \n", "15 | \n", "39308.529474 | \n", "2.197996 | \n", "6551.421579 | \n", "
| 9 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_540PE_40X | \n", "40 | \n", "540 | \n", "14 | \n", "41838.382325 | \n", "2.065089 | \n", "6508.192806 | \n", "
| 10 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_720PE_36X | \n", "36 | \n", "720 | \n", "20 | \n", "29888.442232 | \n", "2.890750 | \n", "6641.876051 | \n", "
| 11 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_720PE_36X | \n", "36 | \n", "720 | \n", "20 | \n", "29878.920732 | \n", "2.891671 | \n", "6639.760163 | \n", "
| 12 | \n", "test_CTL1880s_C360_tigercpu_intelmpi_18_720PE_40X | \n", "40 | \n", "720 | \n", "18 | \n", "32865.550570 | \n", "2.628893 | \n", "6573.110114 | \n", "