This script shows how the analysis is done and how plots are created


In [1]:
# import all required modules

%pylab inline

import netCDF4
from scipy import ndimage

import matplotlib.pyplot as plt
import matplotlib.colors as colors
import cartopy.crs as ccrs

from scipy import stats

from pathlib import Path

from hypercc.data.box import Box
from hypercc.data.data_set import DataSet
from hypercc.units import unit
from hypercc.plotting import (
    plot_mollweide, plot_orthographic_np, plot_plate_carree, earth_plot,
    plot_signal_histogram)
from hypercc.filters import (taper_masked_area, gaussian_filter, sobel_filter)
from hypercc.calibration import (calibrate_sobel)


Populating the interactive namespace from numpy and matplotlib

Enter your settings here


In [2]:
data_folder = Path("/home/bathiany/Sebastian/datamining/edges/testdata")

variable = "tas"          # variable from CMIP5
model = "MPI-ESM-LR"      # CMIP5 model

# calibration of the aspect ratio is based on which quartile of the gradients 
# for climate models, use 3, for idealised test cases, use 4
quartile_calibration=3

# which month should be selected for the yearly time series (1-12; 13 is annual mean)
month = 4

## smoothing scales
sigma_d = unit('100 km')     # space
sigma_t = unit('10 year')    # time

select data based on settings above

No editing below this point required


In [3]:
data_set = DataSet.cmip5(
    path=data_folder,
    model=model, 
    variable=variable,
    scenario='rcp85',      # climate change scenario, here: rcp85
    realization='r1i1p1',  # realisation 
)[month-1::12]

data_set.load()

Load and inspect the data

Next we define a box. The box contains all information on the geometry of the data. It loads the lattitudes and longitudes of the grid points from the NetCDF file and computes quantities like resolution.


In [4]:
from datetime import date, timedelta

box = data_set.box

print("({:.6~P}, {:.6~P}, {:.6~P}) per pixel".format(*box.resolution))
for t in box.time[:3]:
    print(box.date(t), end=', ')
print(" ...")

dt = box.time[1:] - box.time[:-1]
print("time steps: max", dt.max(), "min", dt.min())


(0.999981 year, 207.379 km, 208.49 km) per pixel
2006-04-16, 2007-04-16, 2008-04-16,  ...
time steps: max 366.0 min 365.0

The Box class has the nice feature that you can select slices from it, and the information will adapt. The data we loaded contains monthly snapshots. If we select one snapshot in twelve, we obtain yearly data for that month. The Box class stores the axis grid points internally and slices the time axis accordingly.

Calibration


In [5]:
control_set = DataSet.cmip5(
    path=data_folder, model=model, variable=variable,
    scenario='piControl', realization='r1i1p1')[month-1::12]

control_data = control_set.data
control_box = control_set.box

In [6]:
# smooth over continental boundaries to avoid detecting edges at the coastlines
taper_masked_area(control_data, [0, 5, 5], 50)


Land-sea mask is empty. No smoothing at coasts is needed.

In [7]:
smooth_control_data = gaussian_filter(control_box, control_data, [sigma_t, sigma_d, sigma_d])

In [8]:
# scaling_factor is the aspect ratio between space and time
# Here it is initialised as 1, but will be calibrated automatically later
scaling_factor = unit('1 km/year')
sobel_delta_t = unit('1 year')                    # time scale
sobel_delta_d = sobel_delta_t * scaling_factor    # length scale
sobel_weights = [sobel_delta_t, sobel_delta_d, sobel_delta_d]

In [9]:
calibration = calibrate_sobel(quartile_calibration, control_box, smooth_control_data, sobel_delta_t, sobel_delta_d)

for k, v in calibration.items():
    print("{:10}: {}".format(k, v))
print("recommended setting for gamma: ", calibration['gamma'][quartile_calibration])


time      : [0.         0.0051938  0.01169225 0.02328531 0.5235724 ]
magnitude : [2.6918345e-10 1.2084913e-04 3.0507491e-04 9.4191078e-04 2.7414030e-01]
distance  : [3.5434670e-07 5.7468936e-03 1.0206162e-02 1.5839750e-02 1.8779895e-01]
gamma     : [0.        0.9037577 1.1456068 1.4700557 2.787941 ]
recommended setting for gamma:  1.4700557

In [10]:
sb_control = sobel_filter(control_box, smooth_control_data, weight=sobel_weights)
pixel_sb_control = sobel_filter(control_box, smooth_control_data, physical=False)
pixel_sb_control[3] = sb_control[3]

signal_control = (1.0 / sb_control[3])

In [11]:
gamma_cal = calibration['gamma'][quartile_calibration]   #default in hypercc: 3

scaling_factor = gamma_cal * unit('1 km/year')
sobel_delta_d = sobel_delta_t * scaling_factor
sobel_weights = [sobel_delta_t, sobel_delta_d, sobel_delta_d]

In [12]:
## gradients in physical units

# space gradient in K / km
sgrad_phys = np.sqrt(sb_control[1]**2 + sb_control[2]**2) / sb_control[3]

# time gradient in K / year
tgrad = sb_control[0]/sb_control[3]

In [13]:
##### scatter diagram of gradients in piControl

#matplotlib.rcParams['figure.figsize'] = (20, 20)
#matplotlib.rcParams.update({'font.size': 40})
#matplotlib.rc('xtick', labelsize=30) 
#matplotlib.rc('ytick', labelsize=30) 


## scatter plot of gradients in space and time:
plt.scatter(sgrad_phys, tgrad, s=0.1, marker = '.');

plt.xlabel('K / km', fontsize=32)
plt.ylabel('K / yr', fontsize=32)

plt.tick_params(axis='both', which='major', labelsize=32)

#### set axis ranges
border=0.15
Smin=np.min(sgrad_phys)-(np.max(sgrad_phys)-np.min(sgrad_phys))*border
Smax=np.max(sgrad_phys)+(np.max(sgrad_phys)-np.min(sgrad_phys))*border
#Tmin=np.min(tgrad)-(np.max(tgrad)-np.min(tgrad))*border
#Tmax=np.max(tgrad)+(np.max(tgrad)-np.min(tgrad))*border

# for MPI-ESM temperature case
#Smin=-0.01
#Smax=0.6
Tmin=-0.6
Tmax=0.6

plt.xlim(Smin, Smax)
plt.ylim(Tmin, Tmax)

## max space gradient (4th quartile)
plt.axvline(x=np.max(sgrad_phys), ymin=0, ymax=1, color='r', linestyle="-")

## max time gradient
plt.axhline(xmin=0, xmax=1, y=np.max(np.abs(tgrad)), color='r', linestyle="-")
plt.axhline(xmin=0, xmax=1, y=-np.max(np.abs(tgrad)), color='r', linestyle="-")

# selected quartile
plt.axvline(x=calibration['distance'][quartile_calibration], ymin=0, ymax=1, color='g', linestyle="--")
plt.axhline(xmin=0, xmax=1, y=calibration['time'][quartile_calibration], color='g', linestyle="--")
plt.axhline(xmin=0, xmax=1, y=-calibration['time'][quartile_calibration], color='g', linestyle="--")


Out[13]:
<matplotlib.lines.Line2D at 0x7f916372f588>

In [14]:
## defining the threshold parameters for hysteresis thresholding:
# each pixel with a the gradient above the upper threshold is labeled as a strong edge.
# each pixel that is above the lower threshold is labeled as a weak edge.
# all strong edges are kept as edges.
# all weak edges that are connected to strong edges are kept as edges, the others are dropped.

# set upper threshold as the combination of the maxima of gradients in space and time
mag_quartiles=np.sqrt((calibration['distance'] * gamma_cal)**2 + calibration['time']**2)
upper_threshold = mag_quartiles[4]

# set lower threshold to be half the upper threshold
lower_threshold = upper_threshold/2

In [15]:
## equivalent space gradient in °C / yr (scaling_factor is in kilometer/year)

sgrad_scaled = sgrad_phys * gamma_cal                   # K/km * km/yr => K/yr

In [16]:
## now, the moment of sgrad_scaled that was chosen for calibration is identical to the moment of tgrad
# This means that the green lines in the figure below are at the same value on both axes.
(calibration['distance'][quartile_calibration] * gamma_cal, calibration['time'][quartile_calibration])


Out[16]:
(0.023285314, 0.023285313)

In [17]:
##### scatter diagram of gradients in piControl as calibrated units

#matplotlib.rcParams['figure.figsize'] = (20, 20)
#matplotlib.rcParams.update({'font.size': 40})
matplotlib.rc('xtick', labelsize=32) 
matplotlib.rc('ytick', labelsize=32) 
plt.tick_params(axis='both', which='major', labelsize=32)


## scatter plot of gradients in space and time:
plt.scatter(sgrad_scaled, tgrad, s=0.1, marker = '.');

plt.xlabel('K / yr')
plt.ylabel('K / yr')

## max space gradient (rescaled)
plt.axvline(x=np.max(sgrad_scaled), ymin=0, ymax=1, color='r', linestyle="-")

## max time gradient
plt.axhline(xmin=0, xmax=1, y=np.max(np.abs(tgrad)), color='r', linestyle="-")
plt.axhline(xmin=0, xmax=1, y=-np.max(np.abs(tgrad)), color='r', linestyle="-")

# quartiles
plt.axvline(x=calibration['distance'][quartile_calibration]*gamma_cal, ymin=0, ymax=1, color='g', linestyle="--")
plt.axhline(xmin=0, xmax=1, y=calibration['time'][quartile_calibration], color='g', linestyle="--")
plt.axhline(xmin=0, xmax=1, y=-calibration['time'][quartile_calibration], color='g', linestyle="--")


#### circle showing the threshold values of hysteresis thresholding
dp = np.linspace(-np.pi/2, np.pi/2, 100)

radius=upper_threshold
dt = radius * sin(dp)
dx = radius * cos(dp) 
plt.plot(dx, dt, c='k')

## circle showing the lower threshold:
radius=lower_threshold
dt = radius * sin(dp)
dx = radius * cos(dp)
plt.plot(dx, dt, c='k') 
                 

#### set axis ranges (adjusted to the specific example of MPI-ESM, temp, mon 4)
Smin=-0.01
Smax=0.6
Tmin=-0.6
Tmax=0.6

plt.xlim(Smin, Smax)
plt.ylim(Tmin, Tmax)


Out[17]:
(-0.6, 0.6)

In [18]:
# show signal of all grid cells over time in pre-industrial control simulation

plot_signal_histogram(control_box, signal_control, upper_threshold, lower_threshold)

matplotlib.rcParams.update({'font.size': 32})
matplotlib.rc('xtick', labelsize=32) 
matplotlib.rc('ytick', labelsize=32) 

#matplotlib.rcParams.update({'font.size': 40})


Canny edges

The Canny edge detector consists of four stages:

  • Smoothing
  • Differentiating (Sobel operator)
  • Non-maximum suppression
  • Hysteresis thresholding

The first two of these stages are sensitive to the geometry of the information. How data is smoothed or differentiated is influenced by the spherical projection of our data. In this method we assume a Plate-Carree projection, where lattitudes and longitudes are mapped to a 1:2 rectangle with equal number of degrees per pixel everywhere. We may test this assumption on our box object.


In [19]:
box.rectangular


Out[19]:
True

Smoothing

Smoothing in the lattitudinal direction (N-S) is not affected by the projection; distances do not vary since the circles of equal longitude are all greater circles on the sphere. We apply a Gaussian filter where we reflect data in the lattitudinal and time directions and wrap in the longitudinal direction. Next, to filter in the longitudinal direction, we need to use a different filter width for each lattitude.


In [20]:
data = data_set.data
yearly_box = box[month-1::12]

# smooth over continental boundaries
taper_masked_area(data, [0, 5, 5], 50)    
# smoothing is not applied in time, 5 grid boxes wide in space (lat and lon), iteration: 50 times

smooth_data = gaussian_filter(box, data, [sigma_t, sigma_d, sigma_d])


Land-sea mask is empty. No smoothing at coasts is needed.

Sobel filtering

The Sobel filter has the same problem as the Gaussian filter, but the solution is easier. We just correct for the magnitude of the Sobel response by multiplying the longitudinal component by the cosine of the latitude.


In [21]:
sb = sobel_filter(box, smooth_data, weight=sobel_weights)
pixel_sb = sobel_filter(box, smooth_data, physical=False)
pixel_sb[3] = sb[3]

Determine hysteresis settings


In [22]:
## The signal is the gradient in space and time 
# If gradients occurred only in time, the unit here could be interpreted as degrees C per year.
# The relation between space and time units is given by the factor "gamma" resulting from the calibration above.
# The two thresholds determined from the gradients in piControl are shown as two horizonal blue lines.
# The part of the signal above the upper threshold triggers the detection of edges.

signal = (1.0 / sb[3])

plot_signal_histogram(box, signal, upper_threshold, lower_threshold)

matplotlib.rcParams.update({'font.size': 22})



In [23]:
from hyper_canny import cp_edge_thinning, cp_double_threshold

In [24]:
# use directions of pixel based sobel transform and magnitudes from calibrated physical sobel.
dat = pixel_sb.transpose([3,2,1,0]).copy()
mask = cp_edge_thinning(dat)
thinned = mask.transpose([2, 1, 0])
dat = sb.transpose([3,2,1,0]).copy()

In [25]:
thinned *= ~data.mask
thinned[:10] = 0
thinned[-10:] = 0

In [26]:
## edge thinning
edges = cp_double_threshold(data=dat, mask=thinned.transpose([2,1,0]), a=1/upper_threshold, b=1/lower_threshold)
m = edges.transpose([2, 1, 0])

In [27]:
## a first look at the data (first time step)
plot_mollweide(box, data_set.data[0])


Out[27]:

In [28]:
# same for polar view

plot_orthographic_np(box, data_set.data[0])


Out[28]:

In [29]:
## define colour scale for plotting with white where variable is 0
my_cmap = matplotlib.cm.get_cmap('rainbow')
matplotlib.rcParams['figure.figsize'] = (25,10)
my_cmap.set_under('w')

In [30]:
## count how many separate edges can be distinguished
# Here, result is one large event in the Arctic Ocean 
# This occurs because it is the same sea ice edge that shifts in space over time.
labels, n_features = ndimage.label(m, ndimage.generate_binary_structure(3, 3))
print(n_features)
big_enough = [x for x in range(1, n_features+1) if (labels==x).sum() > 100]
print(big_enough)
labels = np.where(np.isin(labels, big_enough), labels, 0)

#plot_plate_carree(yearly_box, labels.max(axis=0), cmap=my_cmap, vmin=0.1)
plot_orthographic_np(yearly_box, labels.max(axis=0), cmap=my_cmap, vmin=0.1)


1
[1]
Out[30]:

In [31]:
## event count plot: how many years are part of the edge at each grid cell
#plot_plate_carree(yearly_box, np.sum(m, axis=0), cmap=my_cmap, vmin=0.1)
plot_orthographic_np(yearly_box, np.sum(m, axis=0), cmap=my_cmap, vmin=0.1)


Out[31]:

In [32]:
## calculate maximum excess time gradient at each grid cell (i.e. gradient after removing the mean trend)

tgrad=sb[0]/sb[3]
maxm=np.nanmax(m, axis=0)

tgrad_residual = tgrad - np.mean(tgrad, axis=0)   # remove time mean
maxTgrad = np.max(abs(tgrad_residual), axis=0)    # maximum of time gradient
maxTgrad = maxTgrad * maxm

#plot_plate_carree(box, maxTgrad, cmap=my_cmap, vmin=1e-30)
plot_orthographic_np(box, maxTgrad, cmap=my_cmap, vmin=1e-30)


Out[32]:

quantifying the abruptness in time:


In [33]:
cutoff_length=2       # how many years to either side of the abrupt shift are cut off (the index of the event itself is always cut off)
chunk_max_length=30   # maximum length of chunk of time series to either side of the event
chunk_min_length=15   # minimum length of these chunks

In [34]:
years = np.array([d.year for d in box.dates])
edges = cp_double_threshold(data=dat, mask=thinned.transpose([2,1,0]), a=1/upper_threshold, b=1/lower_threshold)
m = edges.transpose([2, 1, 0])
idx = np.where(m)
indices=np.asarray(idx)
abruptness3d=m*0.0

shapeidx=np.shape(idx)
nofresults=shapeidx[1]

for result in range(nofresults):
    [dim0,dim1,dim2]=indices[:,result]

    if m[dim0, dim1, dim2] == 1:
        index=dim0
        chunk1_data=data[0:index-cutoff_length,dim1,dim2]
        chunk2_data=data[index+cutoff_length+1:,dim1,dim2]
        chunk1_years=years[0:index-cutoff_length]
        chunk2_years=years[index+cutoff_length+1:]    
            
        if size(chunk1_data) > chunk_max_length:
            chunk1_start=size(chunk1_data)-chunk_max_length
        else:
            chunk1_start=0  
        if size(chunk2_data) > chunk_max_length:
            chunk2_end=chunk_max_length
        else:
            chunk2_end=size(chunk2_data)

        chunk1_data_short=chunk1_data[chunk1_start:]
        chunk2_data_short=chunk2_data[0:chunk2_end]                

        N1=size(chunk1_data_short)
        N2=size(chunk2_data_short)
                
        if not ((N1 < chunk_min_length) or (N2 < chunk_min_length)):
            chunk1_years_short=chunk1_years[chunk1_start:]-years[dim0]
            chunk2_years_short=chunk2_years[0:chunk2_end]-years[dim0]
        
            slope_chunk1, intercept_chunk1, r_value, p_value, std_err = stats.linregress(chunk1_years_short, chunk1_data_short)
            chunk1_regline=intercept_chunk1 + slope_chunk1*chunk1_years_short
            
            slope_chunk2, intercept_chunk2, r_value, p_value, std_err = stats.linregress(chunk2_years_short, chunk2_data_short)
            chunk2_regline=intercept_chunk2 + slope_chunk2*chunk2_years_short
            
            mean_std=(np.nanstd(chunk1_data_short)+np.nanstd(chunk2_data_short))/2
            abruptness3d[dim0,dim1,dim2]=abs(intercept_chunk1-intercept_chunk2)/mean_std
abruptness=np.max(abruptness3d,axis=0)

In [35]:
# map of the maximum abruptness at each point
#plot_plate_carree(box, abruptness, cmap=my_cmap, vmin=1e-30)
plot_orthographic_np(box, abruptness, cmap=my_cmap, vmin=1e-30)


Out[35]:

In [36]:
## year in which the maximum of abruptness occurs at each point

idx = np.where(m)
indices=np.asarray(idx)

mask_max=m*0

shapeidx=np.shape(idx)
nofresults=shapeidx[1]

# mask_max is like m but only shows the time points with the maximum abruptness at each grid cell 
for result in range(nofresults):
    [dim0,dim1,dim2]=indices[:,result]
    if ( abruptness3d[dim0, dim1, dim2] == abruptness[dim1,dim2]) and abruptness[dim1,dim2] > 0:
        mask_max[dim0, dim1, dim2] = 1
        
    
years_maxpeak=(years[:,None,None]*mask_max).sum(axis=0)

minval = np.min(years_maxpeak[np.nonzero(years_maxpeak)])
maxval= np.max(years_maxpeak)
#plot_plate_carree(yearly_box, years_maxpeak,  cmap=my_cmap, vmin=minval, vmax=maxval) #, vmin=2000, vmax=2200)
plot_orthographic_np(yearly_box, years_maxpeak,  cmap=my_cmap, vmin=minval, vmax=maxval)


Out[36]:

In [37]:
## Show (part of) the time series of the original data at the grid cell with the largest abruptness
# red: original data
# blue: smoothed data (in space and time)
# red vertical dashed line: identification of the position of the edge in time (based on smoothed data)

lonind=np.nanargmax(np.nanmax(abruptness, axis=0))
latind=np.nanargmax(np.nanmax(abruptness, axis=1))

tindex_ini=0
tindex_fin=2200-2006
years_window=years[tindex_ini:tindex_fin]

ts=data[tindex_ini:tindex_fin,latind,lonind]
abruptness_max=abruptness[latind,lonind]
ts_smooth=smooth_data[tindex_ini:tindex_fin,latind,lonind]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(years_window, ts, 'k', years_window, ts_smooth, 'b--')

## determine year of abrupt shift
index=np.where(abruptness3d[:,latind,lonind]==abruptness_max)

ax.axvline(x=years_window[index], ymin=0, ymax=1, color='r', linestyle="--")

plt.ylabel('Temperature [K]')
plt.xlabel('Time [year]')
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

ax.tick_params(axis='both', which='major', labelsize=26)

ymin=min(ts)
ymax=max(ts)
xmin=min(years_window)
xmax=max(years_window)
xrange=xmax-xmin
yrange=ymax-ymin
frac=0.025
ypos=ymax-0.025*yrange
xpos=xmin+0.01*xrange
ax.text(xpos,ypos,'abruptness: '+ '{:f}'.format(abruptness_max),color='r', size=30)


Out[37]:
Text(2007.93, 273.35465850830076, 'abruptness: 4.672750')

Illustration of how abruptness is computed


In [38]:
## pick the grid point with the largest abruptness identified above:

# location in space
dim1=latind
dim2=lonind

# moment of the abrupt event in time (year) at the location of largest abruptness
idt = np.where(mask_max[:, dim1, dim2])
index=int(np.asarray(idt))

In [39]:
# cut the time series in half and remove moment of the transition:

chunk1_data=data[0:index-cutoff_length,dim1,dim2]
chunk2_data=data[index+cutoff_length+1:,dim1,dim2]
chunk1_years=years[0:index-cutoff_length]
chunk2_years=years[index+cutoff_length+1:]            
        
fig = plt.figure()
ax = fig.add_subplot(111)

plt.ylabel('Temperature [K]')
plt.xlabel('Time [year]')
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

ax.tick_params(axis='both', which='major', labelsize=26)


ax.plot(chunk1_years, chunk1_data, 'k', chunk2_years, chunk2_data, 'r')


Out[39]:
[<matplotlib.lines.Line2D at 0x7f915f06d1d0>,
 <matplotlib.lines.Line2D at 0x7f9169a1a710>]

In [40]:
if size(chunk1_data) > chunk_max_length:
    chunk1_start=size(chunk1_data)-chunk_max_length
else:
    chunk1_start=0  # first element
if size(chunk2_data) > chunk_max_length:
    chunk2_end=chunk_max_length
else:
    chunk2_end=size(chunk2_data)

chunk1_data_short=chunk1_data[chunk1_start:]
chunk2_data_short=chunk2_data[0:chunk2_end]

chunk1_years_short=chunk1_years[chunk1_start:]-years_window[index]
chunk2_years_short=chunk2_years[0:chunk2_end]-years_window[index]

In [41]:
fig = plt.figure()
ax = fig.add_subplot(111)

plt.ylabel('Temperature [K]')
plt.xlabel('Relative time [year]')

matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

ax.tick_params(axis='both', which='major', labelsize=26)
ax.plot(chunk1_years_short, chunk1_data_short, 'k', chunk2_years_short, chunk2_data_short, 'r')


Out[41]:
[<matplotlib.lines.Line2D at 0x7f91699b2160>,
 <matplotlib.lines.Line2D at 0x7f91699d3ac8>]

In [42]:
## determine the intercepts for regression lines

slope_chunk1, intercept_chunk1, r_value, p_value, std_err = stats.linregress(chunk1_years_short, chunk1_data_short)
chunk1_regline=intercept_chunk1 + slope_chunk1*chunk1_years_short

slope_chunk2, intercept_chunk2, r_value, p_value, std_err = stats.linregress(chunk2_years_short, chunk2_data_short)
chunk2_regline=intercept_chunk2 + slope_chunk2*chunk2_years_short

fig = plt.figure()
ax = fig.add_subplot(111)

plt.ylabel('Temperature [K]')
plt.xlabel('Relative time [year]')
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 
ax.tick_params(axis='both', which='major', labelsize=26)
ax.plot(chunk1_years_short, chunk1_data_short, 'k', chunk2_years_short, chunk2_data_short, 'r')
ax.plot(chunk1_years_short, chunk1_regline, 'k', chunk2_years_short, chunk2_regline, 'r')


Out[42]:
[<matplotlib.lines.Line2D at 0x7f916998e358>,
 <matplotlib.lines.Line2D at 0x7f916999ae48>]

In [43]:
[intercept_chunk1, intercept_chunk2]


Out[43]:
[263.1771736866375, 269.8104813250074]

In [44]:
[np.nanstd(chunk1_data_short), np.nanstd(chunk2_data_short)]


Out[44]:
[2.1246834, 0.71446174]

In [45]:
## offset between the intercepts:
# here, temperature jumps up by > 6 degrees in a few years
offset=intercept_chunk2-intercept_chunk1

## mean of the standard deviation of the residuals on both sides 
# equally weighted, and taking the mean slope into account
# here: 1.44 °C
mean_std=(np.nanstd(chunk1_data_short)+np.nanstd(chunk2_data_short))/2

# abruptness is here defined as the ratio of the two:
abruptness_local=abs(offset)/mean_std

[offset, mean_std, abruptness_local]


Out[45]:
[6.6333076383698995, 1.4195725917816162, 4.672749866243087]

In [46]:
## Function obtaining the edges, their position in space and time, and the abruptness as measured above
## These outputs are now sorted according to abruptess to produce the scatter plots below.

def feature_stats_all(box, m, sb, abruptness3d, years):
    years3d=years[:,None,None]*m
    lats=box.lat
    lats3d=lats[None,:,None]*m
    lons=box.lon
    lons3d=lons[None,None,:]*m
    idx = np.where(m)
    abruptness2 =abruptness3d[idx[0], idx[1], idx[2]]
    years2=years3d[idx[0], idx[1], idx[2]]
    lats2  =lats3d[idx[0], idx[1], idx[2]]
    lons2  =lons3d[idx[0], idx[1], idx[2]]
    sobel  = sb[:, idx[0], idx[1], idx[2]]
    dx = np.sqrt(sobel[1]**2 + sobel[2]**2) * unit('1/year') / scaling_factor
    return {
        'dx': dx/sobel[3],              # space gradient
        'dt': sobel[0]/sobel[3],        # time gradient
        'idx': idx,
        'abruptness2': abruptness2,
        'years2': years2,
        'lats2': lats2,
        'lons2': lons2
    }

scatter plot of space versus time gradients


In [47]:
## obtain location of edges in space and time, the magnitude of the gradients and their abruptness, and sort them
stats = feature_stats_all(box, m, sb, abruptness3d, years)

sgrad=stats['dx'] *1000    # scale to 1000 km
sgrad = sgrad.magnitude
tgrad=stats['dt'] *10      # scale to 10 years
abruptness2=stats['abruptness2']
years2=stats['years2']
lats2=stats['lats2']
lons2=stats['lons2']

# sort data points in order to show most abrupt ones on top of the others in scatter plot
inds = np.argsort(abruptness2)
sgrad_sorted = sgrad[inds]
tgrad_sorted = tgrad[inds]
abruptness_sorted = abruptness2[inds]
years_sorted = years2[inds]
lats_sorted = lats2[inds]
lons_sorted = lons2[inds]

In [48]:
def scatter_gradients(sgrad,tgrad,size,colour, colourbarlabel):
    ## scatter plot of space versus time gradients and their abruptness

    matplotlib.rcParams['figure.figsize'] = (15, 10)

    #### ellipses showing the threshold values of hysteresis thresholding
    dp = np.linspace(-np.pi/2, np.pi/2, 100)

    radius=upper_threshold
    dt = radius * sin(dp) * 10
    dx = radius * cos(dp) / gamma_cal * 1000 ## convert back from time to space units

    # ellipse showing the aspect ratio. for scaling_factor=1 would be a circle
    # the radius of that circle is the upper threshold
    plt.plot(dx, dt, c='k')

    ## ellipse based on the lower threshold:
    radius=lower_threshold
    dt = radius * sin(dp) * 10
    dx = radius * cos(dp)  / gamma_cal  *1000
    plt.plot(dx, dt, c='k') 
                 
    ## scatter plot, weighted with abruptness:
    plt.scatter(sgrad, tgrad,s=size**2,c=colour, marker = 'o', cmap =my_cmap );
    cbar=plt.colorbar()
    cbar.set_label(colourbarlabel)

    matplotlib.rcParams.update({'font.size': 40})

    plt.xlabel('1 K / 1000 km')
    plt.ylabel('1 K / decade')
    matplotlib.rc('xtick', labelsize=32) 
    matplotlib.rc('ytick', labelsize=32) 

    plt.tick_params(axis='both', which='major', labelsize=32)

    
    #### set axis ranges
    border=0.05
    Smin=np.min(sgrad)-(np.max(sgrad)-np.min(sgrad))*border
    Smax=np.max(sgrad)+(np.max(sgrad)-np.min(sgrad))*border
    Tmin=np.min(tgrad)-(np.max(tgrad)-np.min(tgrad))*border
    Tmax=np.max(tgrad)+(np.max(tgrad)-np.min(tgrad))*border

    plt.xlim(Smin, Smax)
    plt.ylim(Tmin, Tmax)

In [49]:
## scatter plot of space versus time gradients and their abruptness
scatter_gradients(sgrad_sorted, tgrad_sorted,abruptness_sorted,abruptness_sorted,'abruptness')



In [50]:
## same but color coding with years
scatter_gradients(sgrad_sorted, tgrad_sorted,abruptness_sorted,years_sorted,'year')



In [51]:
## same but color coding with latitudes
scatter_gradients(sgrad_sorted, tgrad_sorted,abruptness_sorted,lats_sorted,'latitude')



In [52]:
## same but color coding with longitudes
scatter_gradients(sgrad_sorted, tgrad_sorted,abruptness_sorted,lons_sorted,'longitude')



In [ ]: