In [52]:
import warnings
warnings.filterwarnings('ignore')

import numpy
from numpy import inf
import pandas
import iris
import iris.iterate
import iris.pandas
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.lines import Line2D
import seaborn as sns

import xarray as xr

In [3]:
import os, sys

cwd = os.getcwd()
repo_dir = '/'
for directory in cwd.split('/')[1:]:
    repo_dir = os.path.join(repo_dir, directory)
    if directory == 'ocean-analysis':
        break

modules_dir = os.path.join(repo_dir, 'modules')
sys.path.append(modules_dir)

import convenient_universal as uconv

In [4]:
%matplotlib inline

In [5]:
tfile = '/g/data/ua6/DRSv3/CMIP5/CCSM4/historical/mon/ocean/r1i1p1/thetao/latest/thetao_Omon_CCSM4_historical_r1i1p1_200001-200512.nc'
sfile = '/g/data/ua6/DRSv3/CMIP5/CCSM4/historical/mon/ocean/r1i1p1/so/latest/so_Omon_CCSM4_historical_r1i1p1_200001-200512.nc'
vfile = '/g/data/r87/dbi599/DRSv2/CMIP5/CCSM4/historical/fx/ocean/r0i0p0/volcello/latest/volcello-inferred_fx_CCSM4_historical_r0i0p0.nc'
bfile = '/g/data/r87/dbi599/DRSv2/CMIP5/CCSM4/historical/fx/ocean/r0i0p0/basin/latest/basin_fx_CCSM4_historical_r0i0p0.nc'

In [6]:
tcube = iris.load_cube(tfile)
scube = iris.load_cube(sfile)
vcube = iris.load_cube(vfile)
bcube = iris.load_cube(bfile)

In [7]:
tcube = tcube.collapsed('time', iris.analysis.MEAN)
scube = scube.collapsed('time', iris.analysis.MEAN)

In [8]:
tcube.data = tcube.data - 273.15
scube.data = scube.data * 1000

In [9]:
lats = uconv.broadcast_array(tcube.coord('latitude').points, [1, 2], tcube.shape)
lons = uconv.broadcast_array(tcube.coord('longitude').points, [1, 2], tcube.shape)
levs = uconv.broadcast_array(tcube.coord('depth').points, 0, tcube.shape)

In [10]:
xdata = scube.data.flatten()
ydata = tcube.data.flatten()
vdata = vcube.data.flatten()
bdata = bcube.data.flatten()
latdata = lats.flatten()
londata = lons.flatten()
depthdata = levs.flatten()

histogram


In [37]:
x_edges = numpy.arange(32, 38, 0.05)
y_edges = numpy.arange(-2, 30, 0.25)

In [38]:
hist, xedges, yedges = numpy.histogram2d(xdata, ydata, weights=vdata, bins=[x_edges, y_edges])

In [39]:
hist.shape


Out[39]:
(119, 127)

In [54]:
fig = plt.figure(figsize=[9, 9])

log_hist = numpy.log(hist).T
#log_hist[log_hist == -inf] = 0

plt.imshow(log_hist, origin='lower', extent=[32.025, 37.975, -2.125, 29.875], aspect='auto')

cb = plt.colorbar()  #extend='min')

plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')

plt.show()



In [51]:
numpy.log(hist).min()


Out[51]:
-inf

plt.hexbin

One approach is to use plt.hexbin.


In [24]:
fig = plt.figure(figsize=[9, 9])

ax = fig.add_subplot(111, facecolor=cm.viridis(0))

plt.hexbin(xdata, ydata, C=vdata, reduce_C_function=numpy.sum,
           gridsize=400, bins='log', extent=(32, 38, -2, 30))  #mincnt=16

cb = plt.colorbar()  # extend='min'
plt.ylim(-2, 30)
plt.xlim(32, 38)

plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')

plt.savefig('/g/data/r87/dbi599/figures/global_distribution.png', bbox_inches='tight', dpi=300)


In order to create a difference plot, it's possible to get the values from a h=hexbin() using h.get_values() and set the values using h.set_values(). Check out this example for details.

Alteratively, I could run scipy.stats.binned_statistic_2d, which might be useful to get square bins. I'd then have to use a heat mapping package to create the plot.

It looks like the holoviews version of this (HexTiles) only supports linearly spaced (as opposed to log spaced) bins.

seaborn jointplot


In [14]:
sns.jointplot(x=xdata, y=ydata, C=vdata, kind="hex", 
              gridsize=200, reduce_C_function=numpy.sum,
              xlim=(32, 38), ylim=(-2, 30), bins='log',
              color='k')

cb = plt.colorbar()
plt.show()


pandas.DataFrame.plot.hexbin

In pandas you can use pandas.DataFrame.plot.hexbin instead.


In [15]:
df = pandas.DataFrame(index=range(xdata.shape[0]))
df['temperature'] = ydata.filled(fill_value=5000)
df['salinity'] = xdata.filled(fill_value=5000)
df['volume'] = vdata.filled(fill_value=5000)
df['basin'] = bdata.filled(fill_value=5000)
df['latitude'] = latdata.filled(fill_value=5000)
df['longitude'] = londata.filled(fill_value=5000)
df['depth'] = depthdata

In [16]:
df = df[df.temperature != 5000]
df = df[df.temperature != -273.15]

In [17]:
ax = df.plot.hexbin(x='salinity',
                    y='temperature',
                    C='volume',
                    reduce_C_function=numpy.sum,
                    gridsize=1000,
                    bins='log',
                    cmap="viridis",
                    figsize=(9,9),
                    xlim=(32, 38),
                    ylim=(-2, 30))
ax.set_facecolor(cm.viridis(0))
plt.show()



In [49]:
ax = sns.scatterplot(x="salinity",
                     y="temperature", 
                     size="volume",
                     data=df)
#hue="basin",


The seaborn relplot is a nice option (see example):


In [ ]:
sns.relplot(x="salinity",
            y="temperature",
            hue="basin",
            size="volume",
            sizes=(40, 400),
            alpha=.5,
            palette="muted",
            height=6,
            data=data_df)

Or a bokeh scatterplot that can be interrogated might be better?

Indivudal gyres


In [18]:
basin_names = {0: 'land', 1: 'southern ocean', 2: 'atlantic', 
               3: 'pacific', 4: 'arctic', 5: 'indian', 
               6: 'mediterranean', 7: 'black sea', 8: 'hudson bay',
               9: 'baltic sea', 10: 'red sea'}

def get_basin_name(basin_num):
    return basin_names[basin_num]

In [19]:
df['basin'] = df['basin'].apply(get_basin_name)

In [25]:
fig, ax = plt.subplots(figsize=(9, 9))

df_north_atlantic = df[(df.basin == 'atlantic') & (df.latitude > 0)]
df_south_atlantic = df[(df.basin == 'atlantic') & (df.latitude < 0)]
df_north_pacific = df[(df.basin == 'pacific') & (df.latitude > 0)]
df_south_pacific = df[(df.basin == 'pacific') & (df.latitude < 0)]
df_indian = df[(df.basin == 'indian')]
#df_southern = df[(df.basin == 'southern ocean')]

h_i = plt.hexbin(df_indian['salinity'].values, df_indian['temperature'].values,
                 C=df_indian['volume'].values, reduce_C_function=numpy.sum,
                 gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
                 cmap="Greys", alpha=0.2, vmin=9.8, vmax=16.6)

cb = plt.colorbar()

h_na = plt.hexbin(df_north_atlantic['salinity'].values, df_north_atlantic['temperature'].values,
                  C=df_north_atlantic['volume'].values, reduce_C_function=numpy.sum,
                  gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
                  cmap="Reds", alpha=0.2, vmin=9.8, vmax=16.6)

h_sa = plt.hexbin(df_south_atlantic['salinity'].values, df_south_atlantic['temperature'].values,
                  C=df_south_atlantic['volume'].values, reduce_C_function=numpy.sum,
                  gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
                  cmap="Oranges", alpha=0.2, vmin=9.8, vmax=16.6)

h_np = plt.hexbin(df_north_pacific['salinity'].values, df_north_pacific['temperature'].values,
                  C=df_north_pacific['volume'].values, reduce_C_function=numpy.sum,
                  gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
                  cmap="Greens", alpha=0.2, vmin=9.8, vmax=16.6)

h_sp = plt.hexbin(df_south_pacific['salinity'].values, df_south_pacific['temperature'].values,
                  C=df_south_pacific['volume'].values, reduce_C_function=numpy.sum,
                  gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
                  cmap="Blues", alpha=0.2, vmin=9.8, vmax=16.6)

#plt.hexbin(df_southern['salinity'].values, df_southern['temperature'].values,
#           C=df_southern['volume'].values, reduce_C_function=numpy.sum,
#           gridsize=200, bins='log', extent=(32, 38, -2, 30),
#           cmap="Greys", alpha=0.2)

legend_elements = [Line2D([0], [0], marker='o', color='w', markerfacecolor='grey', label='Indian', alpha=0.5),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='red', label='North Atlantic', alpha=0.5),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='orange', label='South Atlantic', alpha=0.5),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='green', label='North Pacific', alpha=0.5),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', label='South Pacific', alpha=0.5)]
ax.legend(handles=legend_elements, loc=4)

plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')
plt.savefig('/g/data/r87/dbi599/figures/basin_distribution.png', bbox_inches='tight', dpi=300)



In [95]:
print(h_i.get_array().min())
print(h_i.get_array().max())


10.423873039020588
16.10369549329049

In [96]:
print(h_np.get_array().min())
print(h_np.get_array().max())


10.076884052070467
16.524153667039766

In [97]:
print(h_sp.get_array().min())
print(h_sp.get_array().max())


10.300218685024282
16.01830951534105

In [98]:
print(h_na.get_array().min())
print(h_na.get_array().max())


9.81909793629173
15.937873562497153

In [99]:
print(h_sa.get_array().min())
print(h_sa.get_array().max())


10.423873039020588
15.680941702656904

In [ ]: