In [1]:
import numpy as np
import xray
import os
import re
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
ddir = '/data/scratch/rpa/channel_moc/GCM/run'
In [3]:
def parse_available_diagnostics(fname):
all_diags = {}
with open(fname) as f:
# will automatically skip first four header lines
for l in f:
c = re.split('\|',l)
if len(c)==7 and c[0].strip()!='Num':
key = c[1].strip()
levs = int(c[2].strip())
mate = c[3].strip()
if mate: mate = int(mate)
code = c[4]
units = c[5].strip()
desc = c[6].strip()
all_diags[key] = MITgcmDiagnosticDescription(
key, code, units, desc, levs, mate)
return all_diags
class MITgcmDiagnosticDescription(object):
def __init__(self, key, code, units=None, desc=None, levs=None, mate=None):
self.key = key
self.levs = levs
self.mate = mate
self.code = code
self.units = units
self.desc = desc
def coords(self):
"""Parse code to determine coordinates."""
hpoint = self.code[1]
rpoint = self.code[8]
rlev = self.code[9]
xcoords = {'U': 'XG', 'V': 'XC', 'M': 'XC', 'Z': 'XG'}
ycoords = {'U': 'YC', 'V': 'YG', 'M': 'YC', 'Z': 'YG'}
rcoords = {'M': 'RC', 'U': 'RFu', 'L': 'RFl'}
if rlev=='1':
return (ycoords[hpoint], xcoords[hpoint])
elif rlev=='R':
return (rcoords[rpoint], ycoords[hpoint], xcoords[hpoint])
else:
raise ValueError("Don't know what to do with rlev = " + rlev)
In [68]:
all_diags = parse_available_diagnostics(os.path.join(ddir, 'available_diagnostics.log'))
In [71]:
diag = all_diags['UDIAG9']
In [72]:
diag.coords()
In [5]:
print all_diags['PHIBOT'].coords()
print all_diags['PHIBOT'].code
In [6]:
def parse_meta(fname):
flds = {}
basename = re.match('(^\w+)', os.path.basename(fname)).groups()[0]
flds['basename'] = basename
with open(fname) as f:
text = f.read()
# split into items
for item in re.split(';', text):
# remove whitespace at beginning
item = re.sub('^\s+', '', item)
#match = re.match('(\w+) = ', item)
match = re.match('(\w+) = (\[|\{)(.*)(\]|\})', item, re.DOTALL)
if match:
key, _, value, _ = match.groups()
# remove more whitespace
value = re.sub('^\s+', '', value)
value = re.sub('\s+$', '', value)
#print key,':', value
flds[key] = value
# now check the needed things are there
needed_keys = ['dimList','nDims','nrecords','dataprec']
for k in needed_keys:
assert flds.has_key(k)
# transform datatypes
#print flds
flds['nDims'] = int(flds['nDims'])
flds['nrecords'] = int(flds['nrecords'])
# use big endian always
flds['dataprec'] = np.dtype(re.sub("'",'',flds['dataprec'])).newbyteorder('>')
flds['dimList'] = [[int(h) for h in
re.split(',', g)] for g in
re.split(',\n',flds['dimList'])]
if flds.has_key('fldList'):
flds['fldList'] = [re.match("'*(\w+)",g).groups()[0] for g in
re.split("' '",flds['fldList'])]
assert flds['nrecords'] == len(flds['fldList'])
return flds
In [7]:
def read_mds(fname, iternum=None, use_mmap=True,
force_dict=False, convert_big_endian=False):
if iternum is None:
istr = ''
else:
assert isinstance(iternum, int)
istr = '.%010d' % iternum
datafile = fname + istr + '.data'
metafile = fname + istr + '.meta'
# get metadata
meta = parse_meta(metafile)
# why does the .meta file contain so much repeated info?
# just get the part we need
# and reverse order (numpy uses C order, mds is fortran)
shape = [g[0] for g in meta['dimList']][::-1]
assert len(shape) == meta['nDims']
# now add an extra for number of recs
nrecs = meta['nrecords']
shape.insert(0, nrecs)
# load and shape data
if use_mmap:
d = np.memmap(datafile, meta['dataprec'], 'r')
else:
d = np.fromfile(datafile, meta['dataprec'])
if convert_big_endian:
dtnew = d.dtype.newbyteorder('=')
d = d.astype(dtnew)
d.shape = shape
if nrecs == 1:
if meta.has_key('fldList'):
name = meta['fldList'][0]
else:
name = meta['basename']
if force_dict:
return {name: d[0]}
else:
return d[0]
else:
# need record names
out = {}
for n, name in enumerate(meta['fldList']):
out[name] = d[n]
return out
In [8]:
ds = xray.Dataset()
grid_files = ['XC','XG','YG','YG','RC','RF']
geom_files = ['DXC','DXG','DYC','DYG','DRF','DRF','RAC','RAS','RAW','RAZ','hFacC','hFacS','hFacW']
def grid_path(vname):
return os.path.join(ddir, vname)
# 1D orthogonal dimensions
# This is valid for cartesian geometry only!
# ( and maybe spherical polar grid)
ds.coords['XC'] = ('XC', read_mds(grid_path('XC'))[0,:])
ds.coords['XG'] = ('XG', read_mds(grid_path('XG'))[0,:])
ds.coords['YC'] = ('YC', read_mds(grid_path('YC'))[:,0])
ds.coords['YG'] = ('YG', read_mds(grid_path('YG'))[:,0])
ds.coords['RC'] = ('RC', read_mds(grid_path('RC'))[:,0,0])
ds.coords['RF'] = ('RF', read_mds(grid_path('RF'))[:,0,0])
# these are for actual variables
ds.coords['RFl'] = ('RFl', read_mds(grid_path('RF'))[:-1,0,0])
ds.coords['RFu'] = ('RFu', read_mds(grid_path('RF'))[1:,0,0])
# 2D and 3D grid variables
ds.coords['RAC'] = (('YC','XC'), read_mds(grid_path('RAC')))
ds.coords['RAZ'] = (('YG','XG'), read_mds(grid_path('RAC')))
# what about RAS and RAW?
ds.coords['HFACC'] = (('RC','YC','XC'), read_mds(grid_path('hFacC')))
ds.coords['HFACS'] = (('RC','YG','XC'), read_mds(grid_path('hFacS')))
ds.coords['HFACW'] = (('RC','YC','XG'), read_mds(grid_path('hFacW')))
ds
Out[8]:
In [22]:
ls /data/scratch/rpa/channel_moc/GCM/run
Array | Value | Description |
---|---|---|
1 | S | Scalar Diagnostic |
U | U-vector component Diagnostic | |
V | V-vector component Diagnostic | |
2 | U | C-Grid U-Point |
V | C-Grid V-Point | |
M | C-Grid Mass Point | |
Z | C-Grid Vorticity (Corner) Point | |
3 | Used for Level Integrated output: cumulate levels | |
r | same but cumulate product by model level thickness | |
R | same but cumulate product by hFac & level thickness | |
4 | P | Positive Definite Diagnostic |
5 | C | with Counter array |
P | post-processed (not filled up) from other diags | |
D | Disabled Diagnostic for output | |
6-8 | retired, formerly: 3-digit mate number | |
9 | U | model-level plus 1/2 |
M | model-level middle | |
L | model-level minus 1/2 | |
10 | 0 | levels = 0 |
1 | levels = 1 | |
R | levels = Nr | |
L | levels = MAX(Nr,NrPhys) | |
M | levels = MAX(Nr,NrPhys) - 1 | |
G | levels = Ground_level Number | |
I | levels = sea-Ice_level Number | |
X | free levels option (need to be set explicitly) |
In [9]:
state_vars = {
'U': MITgcmDiagnosticDescription(
'U', 'UUR MR', 'm/s', 'Zonal Component of Velocity (m/s)'),
'V': MITgcmDiagnosticDescription(
'V', 'VVR MR', 'm/s', 'Meridional Component of Velocity (m/s)'),
'W': MITgcmDiagnosticDescription(
'W', 'WM LR', 'm/s', 'Vertical Component of Velocity (r_units/s)'),
'T': MITgcmDiagnosticDescription(
'T', 'SMR MR', 'degC', 'Potential Temperature'),
'S': MITgcmDiagnosticDescription(
'S', 'SMR MR', 'psu', 'Salinity'),
'PH': MITgcmDiagnosticDescription(
'PH', 'SMR MR', 'm^2/s^2', 'Hydrostatic Pressure Pot.(p/rho) Anomaly'),
'PHL': MITgcmDiagnosticDescription(
'PHL', 'SM M1', 'm^2/s^2', 'Bottom Pressure Pot.(p/rho) Anomaly'),
'Eta': MITgcmDiagnosticDescription(
'Eta', 'SM M1', 'm', 'Surface Height Anomaly'),
}
In [10]:
iternum = 1728000
for k, v in state_vars.iteritems():
data = read_mds(grid_path(k), iternum)
ds[k] = (v.coords(), data)
ds[k].attrs = {'description': v.desc, 'units': v.units}
In [11]:
ds
Out[11]:
In [26]:
ds['T'] * ds['PH']
Out[26]:
In [27]:
plt.imshow(ds['T'].mean(dim='XC'))
Out[27]:
In [356]:
x = xray.DataArray( np.ones(10, dtype='>f4'))
print float(x.sum()), x.data.sum()
In [355]:
np.ones(10)#, dtype='>4f')
Out[355]:
In [7]:
dt = np.dtype('>f4')
dt
Out[7]:
In [28]:
type(ds['T'].data)
Out[28]:
In [9]:
dtnew
Out[9]:
In [30]:
import bottleneck as bn
bn.bench()
In [282]:
import mdsxray
reload(mdsxray)
Out[282]:
In [289]:
#iters = range(1036800, 2073600,345600)
iters = range(2073840, 2384880, 480)
store = mdsxray.MDSDataStore(ddir, iters, deltaT=900)
ds = xray.Dataset.load_store(store)
ds
In [272]:
#ds.set_coords(dsc.variables.keys(), inplace=True)
#dsc = ds.chunk({'X': 100, 'Y':100})
#dsc
Out[272]:
In [284]:
u = ds['U']
In [287]:
type(u.data)
Out[287]:
In [61]:
xc = read_mds(grid_path('XC'))
xg = read_mds(grid_path('XG'))
yc = read_mds(grid_path('YC'))
xcd = da.from_array(xc, xc.shape)
xgd = da.from_array(xg, xg.shape)
ycd = da.from_array(yc, yc.shape)
In [54]:
import dask.array as da
In [63]:
dstacked = da.stack([xcd, xgd, ycd])
In [64]:
dstacked
Out[64]:
In [136]:
type(np.asarray(xc))
Out[136]:
In [143]:
from xray.backends import ScipyDataStore
In [144]:
sds = ScipyDataStore('/data/scratch/rpa/aviso/ftp.aviso.altimetry.fr/global/delayed-time/grids/msla/all-sat-merged/uv/all-monthly/dt_global_allsat_msla_uv_2014-12.nc')
In [149]:
sds.get_variables()['lat_bnds']
Out[149]:
In [197]:
varnames = ['a', 'b']
vardata = {}
[vardata[k] = [] for k in varnames]
In [199]:
dims = ('Y','X')
('time',) + dims
Out[199]:
In [220]:
m = re.match('pickup', '/pickup.0001036800.meta')
In [223]:
if re.match('pickup', '/pickuP.0001036800.meta'):
print 'ok'
else:
print 'no match'
In [236]:
f = 'DiagLAYERS-diapycnal.0002073600.meta'
In [243]:
re.match('(^.+?)\..+', f).groups()[0]
Out[243]:
In [246]:
vardata = ['a', 'b']
In [247]:
vardata.remove('a')
In [248]:
vardata
Out[248]:
In [288]:
2074320 - 2073840
Out[288]:
In [ ]: