In [1]:
import sys
import os
from tempfile import NamedTemporaryFile
from gc import collect
from time import sleep
sys.path.append('../util')
from meters import ThroughputMeter, clear_host_cache
from ncgen import make_nc
from grids import *
import netCDF4
import numpy as np
In [2]:
def write_netcdf_file(timescale, time_major=True, grid=canada_5k):
print("Creating a time-{} NetCDF file with {}x{} grid and {} time steps".format('major' if time_major else 'minor', grid['lon']['count'], grid['lat']['count'],len(timescale)))
with NamedTemporaryFile(suffix='.nc', delete=False, dir='/app/tmp') as f:
nc = make_nc(f.name, grid=grid, timescale=timescale, timemajor=time_major)
nc.close()
print("File size: {:.2f}Mb".format(os.path.getsize(f.name)/1024/1024))
return f
../tmp path in the Docker container points at rotating media storage.
In [3]:
def netcdf_read_test(f, time_major):
# Open the file just created
nc = netCDF4.Dataset(f.name, 'r')
if time_major:
with ThroughputMeter() as t:
a = nc.variables['var_0'][0,:,:]
else:
with ThroughputMeter() as t:
a = nc.variables['var_0'][:,:,0]
res = (time_major, len(timescale), t.megabytes_per_second(a))
# python-netCDF4 seems to leak file descriptors
# We have to take a lot of steps to make sure that the files get closed and that
# the space gets reclaimed by the OS
nc.close
del nc
print("Removing {}".format(f.name))
os.remove(f.name)
f.close()
collect()
return res
In [4]:
time_major = [True, False]
grids = [world_250k, world_125k, canada_5k, bc_400m]
ts = [timescales['seasonal'], timescales['annual'], timescales['monthly']] # Daily takes hours and hours to run #, timescales['daily']]
In [5]:
results = []
for tm in time_major:
for grid in grids:
for timescale in ts:
testfile = write_netcdf_file(timescale, time_major=tm, grid=grid)
clear_host_cache()
results.append(netcdf_read_test(testfile, tm))
In [6]:
results
Out[6]:
Reshape to dimensions timescale, grid, time_major
In [7]:
a = np.array([r[2] for r in results]).reshape(len(time_major),len(grids),len(ts))
a
Out[7]:
In [8]:
import matplotlib.pyplot as plt
%matplotlib inline
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1)
f.set_size_inches(12, 12)
ax1.plot(a[0,0,:], label="mjr")
ax1.plot(a[1,0,:], label="mnr")
ax1.legend()
ax1.set_title("world_250k")
ax1.set_xticks(range(len(ts)))
ax1.set_xticklabels([len(x) for x in ts])
ax2.plot(a[0,1,:], label="mjr")
ax2.plot(a[1,1,:], label="mnr")
ax2.set_title("world_125k")
ax2.set_xticks(range(len(ts)))
ax2.set_xticklabels([len(x) for x in ts])
ax3.plot(a[0,2,:], label="mjr")
ax3.plot(a[1,2,:], label="mnr")
ax3.set_title("canada_5k")
ax3.set_xticks(range(len(ts)))
ax3.set_xticklabels([len(x) for x in ts])
ax4.plot(a[0,3,:], label="mjr")
ax4.plot(a[1,3,:], label="mnr")
ax4.set_title("bc_400m")
ax4.set_xticks(range(len(ts)))
ax4.set_xticklabels([len(x) for x in ts])
Out[8]:
In [ ]: