In [52]:
import warnings
warnings.filterwarnings('ignore')
import numpy
from numpy import inf
import pandas
import iris
import iris.iterate
import iris.pandas
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.lines import Line2D
import seaborn as sns
import xarray as xr
In [3]:
import os, sys
cwd = os.getcwd()
repo_dir = '/'
for directory in cwd.split('/')[1:]:
repo_dir = os.path.join(repo_dir, directory)
if directory == 'ocean-analysis':
break
modules_dir = os.path.join(repo_dir, 'modules')
sys.path.append(modules_dir)
import convenient_universal as uconv
In [4]:
%matplotlib inline
In [5]:
tfile = '/g/data/ua6/DRSv3/CMIP5/CCSM4/historical/mon/ocean/r1i1p1/thetao/latest/thetao_Omon_CCSM4_historical_r1i1p1_200001-200512.nc'
sfile = '/g/data/ua6/DRSv3/CMIP5/CCSM4/historical/mon/ocean/r1i1p1/so/latest/so_Omon_CCSM4_historical_r1i1p1_200001-200512.nc'
vfile = '/g/data/r87/dbi599/DRSv2/CMIP5/CCSM4/historical/fx/ocean/r0i0p0/volcello/latest/volcello-inferred_fx_CCSM4_historical_r0i0p0.nc'
bfile = '/g/data/r87/dbi599/DRSv2/CMIP5/CCSM4/historical/fx/ocean/r0i0p0/basin/latest/basin_fx_CCSM4_historical_r0i0p0.nc'
In [6]:
tcube = iris.load_cube(tfile)
scube = iris.load_cube(sfile)
vcube = iris.load_cube(vfile)
bcube = iris.load_cube(bfile)
In [7]:
tcube = tcube.collapsed('time', iris.analysis.MEAN)
scube = scube.collapsed('time', iris.analysis.MEAN)
In [8]:
tcube.data = tcube.data - 273.15
scube.data = scube.data * 1000
In [9]:
lats = uconv.broadcast_array(tcube.coord('latitude').points, [1, 2], tcube.shape)
lons = uconv.broadcast_array(tcube.coord('longitude').points, [1, 2], tcube.shape)
levs = uconv.broadcast_array(tcube.coord('depth').points, 0, tcube.shape)
In [10]:
xdata = scube.data.flatten()
ydata = tcube.data.flatten()
vdata = vcube.data.flatten()
bdata = bcube.data.flatten()
latdata = lats.flatten()
londata = lons.flatten()
depthdata = levs.flatten()
In [37]:
x_edges = numpy.arange(32, 38, 0.05)
y_edges = numpy.arange(-2, 30, 0.25)
In [38]:
hist, xedges, yedges = numpy.histogram2d(xdata, ydata, weights=vdata, bins=[x_edges, y_edges])
In [39]:
hist.shape
Out[39]:
In [54]:
fig = plt.figure(figsize=[9, 9])
log_hist = numpy.log(hist).T
#log_hist[log_hist == -inf] = 0
plt.imshow(log_hist, origin='lower', extent=[32.025, 37.975, -2.125, 29.875], aspect='auto')
cb = plt.colorbar() #extend='min')
plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')
plt.show()
In [51]:
numpy.log(hist).min()
Out[51]:
One approach is to use plt.hexbin.
In [24]:
fig = plt.figure(figsize=[9, 9])
ax = fig.add_subplot(111, facecolor=cm.viridis(0))
plt.hexbin(xdata, ydata, C=vdata, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 38, -2, 30)) #mincnt=16
cb = plt.colorbar() # extend='min'
plt.ylim(-2, 30)
plt.xlim(32, 38)
plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')
plt.savefig('/g/data/r87/dbi599/figures/global_distribution.png', bbox_inches='tight', dpi=300)
In order to create a difference plot, it's possible to get the values from a h=hexbin() using h.get_values() and set the values using h.set_values(). Check out this example for details.
Alteratively, I could run scipy.stats.binned_statistic_2d, which might be useful to get square bins. I'd then have to use a heat mapping package to create the plot.
It looks like the holoviews version of this (HexTiles) only supports linearly spaced (as opposed to log spaced) bins.
In [14]:
sns.jointplot(x=xdata, y=ydata, C=vdata, kind="hex",
gridsize=200, reduce_C_function=numpy.sum,
xlim=(32, 38), ylim=(-2, 30), bins='log',
color='k')
cb = plt.colorbar()
plt.show()
In pandas you can use pandas.DataFrame.plot.hexbin instead.
In [15]:
df = pandas.DataFrame(index=range(xdata.shape[0]))
df['temperature'] = ydata.filled(fill_value=5000)
df['salinity'] = xdata.filled(fill_value=5000)
df['volume'] = vdata.filled(fill_value=5000)
df['basin'] = bdata.filled(fill_value=5000)
df['latitude'] = latdata.filled(fill_value=5000)
df['longitude'] = londata.filled(fill_value=5000)
df['depth'] = depthdata
In [16]:
df = df[df.temperature != 5000]
df = df[df.temperature != -273.15]
In [17]:
ax = df.plot.hexbin(x='salinity',
y='temperature',
C='volume',
reduce_C_function=numpy.sum,
gridsize=1000,
bins='log',
cmap="viridis",
figsize=(9,9),
xlim=(32, 38),
ylim=(-2, 30))
ax.set_facecolor(cm.viridis(0))
plt.show()
In [49]:
ax = sns.scatterplot(x="salinity",
y="temperature",
size="volume",
data=df)
#hue="basin",
The seaborn relplot is a nice option (see example):
In [ ]:
sns.relplot(x="salinity",
y="temperature",
hue="basin",
size="volume",
sizes=(40, 400),
alpha=.5,
palette="muted",
height=6,
data=data_df)
Or a bokeh scatterplot that can be interrogated might be better?
In [18]:
basin_names = {0: 'land', 1: 'southern ocean', 2: 'atlantic',
3: 'pacific', 4: 'arctic', 5: 'indian',
6: 'mediterranean', 7: 'black sea', 8: 'hudson bay',
9: 'baltic sea', 10: 'red sea'}
def get_basin_name(basin_num):
return basin_names[basin_num]
In [19]:
df['basin'] = df['basin'].apply(get_basin_name)
In [25]:
fig, ax = plt.subplots(figsize=(9, 9))
df_north_atlantic = df[(df.basin == 'atlantic') & (df.latitude > 0)]
df_south_atlantic = df[(df.basin == 'atlantic') & (df.latitude < 0)]
df_north_pacific = df[(df.basin == 'pacific') & (df.latitude > 0)]
df_south_pacific = df[(df.basin == 'pacific') & (df.latitude < 0)]
df_indian = df[(df.basin == 'indian')]
#df_southern = df[(df.basin == 'southern ocean')]
h_i = plt.hexbin(df_indian['salinity'].values, df_indian['temperature'].values,
C=df_indian['volume'].values, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
cmap="Greys", alpha=0.2, vmin=9.8, vmax=16.6)
cb = plt.colorbar()
h_na = plt.hexbin(df_north_atlantic['salinity'].values, df_north_atlantic['temperature'].values,
C=df_north_atlantic['volume'].values, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
cmap="Reds", alpha=0.2, vmin=9.8, vmax=16.6)
h_sa = plt.hexbin(df_south_atlantic['salinity'].values, df_south_atlantic['temperature'].values,
C=df_south_atlantic['volume'].values, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
cmap="Oranges", alpha=0.2, vmin=9.8, vmax=16.6)
h_np = plt.hexbin(df_north_pacific['salinity'].values, df_north_pacific['temperature'].values,
C=df_north_pacific['volume'].values, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
cmap="Greens", alpha=0.2, vmin=9.8, vmax=16.6)
h_sp = plt.hexbin(df_south_pacific['salinity'].values, df_south_pacific['temperature'].values,
C=df_south_pacific['volume'].values, reduce_C_function=numpy.sum,
gridsize=400, bins='log', extent=(32, 37.5, -2, 30),
cmap="Blues", alpha=0.2, vmin=9.8, vmax=16.6)
#plt.hexbin(df_southern['salinity'].values, df_southern['temperature'].values,
# C=df_southern['volume'].values, reduce_C_function=numpy.sum,
# gridsize=200, bins='log', extent=(32, 38, -2, 30),
# cmap="Greys", alpha=0.2)
legend_elements = [Line2D([0], [0], marker='o', color='w', markerfacecolor='grey', label='Indian', alpha=0.5),
Line2D([0], [0], marker='o', color='w', markerfacecolor='red', label='North Atlantic', alpha=0.5),
Line2D([0], [0], marker='o', color='w', markerfacecolor='orange', label='South Atlantic', alpha=0.5),
Line2D([0], [0], marker='o', color='w', markerfacecolor='green', label='North Pacific', alpha=0.5),
Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', label='South Pacific', alpha=0.5)]
ax.legend(handles=legend_elements, loc=4)
plt.title('Global ocean volume distribution, CCSM4 model')
plt.ylabel('temperature ($C$)')
plt.xlabel('salinity ($g kg^{-1}$)')
cb.set_label('log(volume) ($m^3$)')
plt.savefig('/g/data/r87/dbi599/figures/basin_distribution.png', bbox_inches='tight', dpi=300)
In [95]:
print(h_i.get_array().min())
print(h_i.get_array().max())
In [96]:
print(h_np.get_array().min())
print(h_np.get_array().max())
In [97]:
print(h_sp.get_array().min())
print(h_sp.get_array().max())
In [98]:
print(h_na.get_array().min())
print(h_na.get_array().max())
In [99]:
print(h_sa.get_array().min())
print(h_sa.get_array().max())
In [ ]: