Analysis of signature data



In [ ]:

    
%matplotlib notebook
import numpy as np
from scipy.stats import describe
from scipy.stats import norm as norm_dist
from scipy.stats.mstats import mquantiles
from math import log, sqrt
import matplotlib.pyplot as plt
from matplotlib import ticker, colors, gridspec
from copy import deepcopy
from utils import plot_hist, moving_average, hw, time_scale, hist_size_func, recompute_nonces
from binascii import unhexlify
from IPython.display import display, HTML
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import tabulate

Settings

Enter your input below.



In [ ]:

    
# File name with output from ECTesterReader or ECTesterStandalone signatures.
fname = "filename.csv"

# A hash algorithm used
hash_algo = "SHA1" # e.g. "SHA1" or None for no hash, raw data signatures

# A curve name or a path to curve file, used to recompute the random nonces used in signing, if they are not present
# in the file. (ECTester was not able to recompute them for some reason)
curve = None # e.g. "secg/secp256r1" or "secp256r1.csv" or None for no curve.

# The time unit used in displaying the plots. One of "milli", "micro", "nano".
# WARNING: Using nano might lead to very large plots/histograms and to the
#          notebook to freeze or run out of memory, as well as bad visualization
#          quality, due to noise and low density.
sign_unit = "milli"
verify_unit = "milli"
# A number which will be used to divide the time into sub-units, e.g. for 5, time will be in fifths of units
scaling_factor = 1

# The amount of entries skipped from the beginning of the file, as they are usually outliers.
skip_first = 10

# Whether to plot things in logarithmic scale or not.
log_scale = False

# Whether to trim the time data outside the 1 - 99 percentile range (adjust below). Quite useful.
trim = True

# How much to trim? Either a number in [0,1] signifying a quantile, or an absolute value signifying a threshold
trim_low = 0.01
trim_high = 0.99

# Graphical (matplotlib) style name
style = "ggplot"

# Color map to use, and what color to assign to "bad" values (necessary for log_scale)
color_map = plt.cm.viridis
color_map_bad = "black"

# What function to use to calculate number of histogram bins of time
# one of "sqrt", "sturges", "rice", "scott" and "fd" or a number specifying the number of bins
hist_size = "sturges"

Data processing



In [ ]:

    
# Setup plot style

plt.style.use(style)

cmap = deepcopy(color_map)
cmap.set_bad(color_map_bad)

# Normalization, linear or log.
if log_scale:
    norm = colors.LogNorm()
else:
    norm = colors.Normalize()

# Read the header line.

with open(fname, "r") as f:
    header = f.readline()
header_names = header.split(";")
if len(header_names) != 9:
    print("Bad data?")
    exit(1)

# Load the data

hx = lambda x: int(x, 16)
data = np.genfromtxt(fname, delimiter=";", skip_header=1, converters={3: unhexlify, 4: unhexlify,
                                                                      5: hx, 6: unhexlify, 7: hx,
                                                                      8: lambda b: bool(int(b))},
                     dtype=np.dtype([("index", "u4"), ("sign_time", "u4"), ("verify_time", "u4"),
                                     ("data", "O"), ("pub", "O"), ("priv", "O"), ("signature", "O"),
                                     ("nonce", "O"), ("valid", "b")]))
# Skip first (outliers?)

data = data[skip_first:]

# Setup the data

# Convert time data
orig_sign_unit = header_names[1].split("[")[1][:-1]
orig_verify_unit = header_names[2].split("[")[1][:-1]
sign_disp_unit = time_scale(data["sign_time"], orig_sign_unit, sign_unit, scaling_factor)
verify_disp_unit = time_scale(data["verify_time"], orig_verify_unit, verify_unit, scaling_factor)

if np.any(data["nonce"] == None):
    recompute_nonces(data, curve, hash_algo)

# Trim times
quant_low_bound = trim_low if 0 <= trim_low <= 1 else 0.01
quant_high_bound = trim_high if 0 <= trim_high <= 1 else 0.95
quantiles_sign = mquantiles(data["sign_time"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))
if trim:
    low_bound = quantiles_sign[0] if 0 <= trim_low <= 1 else trim_low
    high_bound = quantiles_sign[4] if 0 <= trim_high <= 1 else trim_high
    data_trimmed = data[np.logical_and(data["sign_time"] >= low_bound,
                                       data["sign_time"] <= high_bound)]
    quantiles_sign_trim = mquantiles(data_trimmed["sign_time"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))
else:
    low_bound = None
    high_bound = None
    data_trimmed = data
    quantiles_sign_trim = quantiles_sign

description_sign = describe(data["sign_time"])
description_sign_trim = describe(data_trimmed["sign_time"])

max_sign_time = description_sign.minmax[1]
min_sign_time = description_sign.minmax[0]
bit_size = len(bin(max(data["priv"]))) - 2
byte_size = (bit_size + 7) // 8
bit_size = byte_size * 8

hist_size_sign_time = hist_size_func(hist_size)(description_sign.nobs, min_sign_time, max_sign_time, description_sign.variance, quantiles_sign[1], quantiles_sign[3])
hist_size_sign_time_trim = hist_size_func(hist_size)(description_sign_trim.nobs, description_sign_trim.minmax[0], description_sign_trim.minmax[1], description_sign_trim.variance, quantiles_sign_trim[1], quantiles_sign_trim[3])

if hist_size_sign_time < 30:
    hist_size_sign_time = max_sign_time - min_sign_time
if hist_size_sign_time_trim < 30:
    hist_size_sign_time_trim = description_sign_trim.minmax[1] - description_sign_trim.minmax[0]

Analysis

Summary



In [ ]:

    
display("Raw")
desc = [("N", "min, max", "mean", "variance", "skewness", "kurtosis"),
        description_sign]
display(HTML(tabulate.tabulate(desc, tablefmt="html")))
display("Trimmed")
desc = [("N", "min, max", "mean", "variance", "skewness", "kurtosis"),
        description_sign_trim]
display(HTML(tabulate.tabulate(desc, tablefmt="html")))

Selected quantiles



In [ ]:

    
tbl = [(quant_low_bound, "0.25", "0.5", "0.75", quant_high_bound),
       list(map(lambda x: "{} {}".format(x, sign_disp_unit), quantiles_sign))]
display(HTML(tabulate.tabulate(tbl, tablefmt="html")))

Info



In [ ]:

    
display("Bitsize:", bit_size)
display("Histogram time bins: {}".format(hist_size_sign_time))
display("Histogram time bins(trimmed): {}".format(hist_size_sign_time_trim))

Plots

Nonce MSB vs signature time heatmap

The heatmap should show uncorrelated variables.



In [ ]:

    
fig_nonce = plt.figure(figsize=(10.5, 8), dpi=90)
axe_nonce = fig_nonce.add_subplot(1, 1, 1, title="Nonce MSB vs signature time")
nonce_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data_trimmed["nonce"])), dtype=np.dtype("u1"))
max_msb = max(nonce_msb)
min_msb = min(nonce_msb)
heatmap, xedges, yedges = np.histogram2d(nonce_msb, data_trimmed["sign_time"],
                                         bins=[max_msb - min_msb + 1, hist_size_sign_time_trim])
extent = [min_msb, max_msb, yedges[0], yedges[-1]]
im = axe_nonce.imshow(heatmap.T, extent=extent, aspect="auto", cmap=cmap, origin="low",
                   interpolation="nearest", norm=norm)
axe_nonce.set_xlabel("nonce MSB value")
axe_nonce.set_ylabel("signature time ({})".format(sign_disp_unit))
fig_nonce.colorbar(im, ax=axe_nonce)

fig_nonce.tight_layout()
del nonce_msb

Nonce Hamming Weight vs signature time heatmap

The heatmap should show uncorrelated variables.

Also contains a nonce Hamming Weight histogram, which should be binomially distributed.



In [ ]:

    
fig_nonce_hist = plt.figure(figsize=(10.5, 12), dpi=90)
gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])
axe_nonce_hist = fig_nonce_hist.add_subplot(gs[0], title="Nonce Hamming weight vs signature time")
axe_nonce_hist_hw = fig_nonce_hist.add_subplot(gs[1], sharex=axe_nonce_hist, title="Nonce Hamming weight")
nonce_hw = np.array(list(map(hw, data_trimmed["nonce"])), dtype=np.dtype("u2"))
h, xe, ye = np.histogram2d(nonce_hw, data_trimmed["sign_time"], bins=[max(nonce_hw) - min(nonce_hw), hist_size_sign_time_trim])
im = axe_nonce_hist.imshow(h.T, origin="low", cmap=cmap, aspect="auto", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)
axe_nonce_hist.axvline(x=bit_size//2, alpha=0.7, linestyle="dotted", color="white", label=str(bit_size//2) + " bits")
axe_nonce_hist.set_xlabel("nonce Hamming weight")
axe_nonce_hist.set_ylabel("signature time ({})".format(sign_disp_unit))
axe_nonce_hist.legend(loc="best")

plot_hist(axe_nonce_hist_hw, nonce_hw, "nonce Hamming weight", log_scale, True, True)

param = norm_dist.fit(nonce_hw)
pdf_range = np.arange(min(nonce_hw), max(nonce_hw))
norm_pdf = norm_dist.pdf(pdf_range, *param[:-2], loc=param[-2], scale=param[-1]) * description_sign_trim.nobs
axe_nonce_hist_hw.plot(pdf_range, norm_pdf, label="fitted normal distribution")
axe_nonce_hist_hw.legend(loc="best")


display(HTML("<b>Nonce Hamming weight fitted with normal distribution:</b>"))
display(HTML(tabulate.tabulate([("Mean", "Variance"), param], tablefmt="html")))

fig_nonce_hist.tight_layout()
fig_nonce_hist.colorbar(im, ax=[axe_nonce_hist, axe_nonce_hist_hw])
del nonce_hw

Signature time histogram



In [ ]:

    
fig_sig_hist = plt.figure(figsize=(10.5, 8), dpi=90)
axe_hist_full = fig_sig_hist.add_subplot(2, 1, 1, title="Signature time")
axe_hist_trim = fig_sig_hist.add_subplot(2, 1, 2, title="Signature time (trimmed)")
plot_hist(axe_hist_full, data["sign_time"], "signature time ({})".format(sign_disp_unit), log_scale, hist_size_sign_time);
plot_hist(axe_hist_trim, data_trimmed["sign_time"], "signature time ({})".format(sign_disp_unit), log_scale, hist_size_sign_time_trim);
fig_sig_hist.tight_layout()

Verification time histogram



In [ ]:

    
fig_vrfy_hist = plt.figure(figsize=(10.5, 5), dpi=90)
axe_hist_full = fig_vrfy_hist.add_subplot(1, 1, 1, title="Verification time")
plot_hist(axe_hist_full, data["verify_time"], "verification time ({})".format(verify_disp_unit), log_scale, hist_size_sign_time);
fig_vrfy_hist.tight_layout()

Moving averages of signature and verification times



In [ ]:

    
fig_avg = plt.figure(figsize=(10.5, 8), dpi=90)
axe_sign_avg = fig_avg.add_subplot(2, 1, 1, title="Moving average of signature time")
axe_vrfy_avg = fig_avg.add_subplot(2, 1, 2, sharex=axe_sign_avg, title="Moving average of verification time")
avg_sign_100 = moving_average(data["sign_time"], 100)
avg_sign_1000 = moving_average(data["sign_time"], 1000)
axe_sign_avg.plot(avg_sign_100, label="window = 100")
axe_sign_avg.plot(avg_sign_1000, label="window = 1000")
if low_bound is not None:
    axe_sign_avg.axhline(y=low_bound, alpha=0.7, linestyle="dotted", color="green", label="Low trim bound = {}".format(low_bound))
if high_bound is not None:
    axe_sign_avg.axhline(y=high_bound, alpha=0.7, linestyle="dotted", color="orange", label="Hight trim bound = {}".format(high_bound))
axe_sign_avg.set_ylabel("signature time ({})".format(sign_disp_unit))
axe_sign_avg.set_xlabel("index")
axe_sign_avg.legend(loc="best")

avg_vrfy_100 = moving_average(data["verify_time"], 100)
avg_vrfy_1000 = moving_average(data["verify_time"], 1000)
axe_vrfy_avg.plot(avg_vrfy_100, label="window = 100")
axe_vrfy_avg.plot(avg_vrfy_1000, label="window = 1000")
axe_vrfy_avg.set_ylabel("verification time ({})".format(verify_disp_unit))
axe_vrfy_avg.set_xlabel("index")
axe_vrfy_avg.legend(loc="best")

fig_avg.tight_layout()

del avg_sign_100, avg_sign_1000, avg_vrfy_100, avg_vrfy_1000

Nonce MSB and LSB histograms

Expected to be uniform over [0, 255].



In [ ]:

    
fig_nonce_hists = plt.figure(figsize=(10.5, 8), dpi=90)
nonce_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data["nonce"])), dtype=np.dtype("u1"))
nonce_lsb = np.array(list(map(lambda x: x & 0xff, data["nonce"])), dtype=np.dtype("u1"))
axe_msb_n_hist = fig_nonce_hists.add_subplot(2, 1, 1, title="Nonce MSB")
axe_lsb_n_hist = fig_nonce_hists.add_subplot(2, 1, 2, title="Nonce LSB")
plot_hist(axe_msb_n_hist, nonce_msb, "nonce MSB", log_scale, False, False)
plot_hist(axe_lsb_n_hist, nonce_lsb, "nonce LSB", log_scale, False, False)

fig_nonce_hists.tight_layout()
del nonce_msb, nonce_lsb

Nonce bit length vs signature time heatmap

Also contains nonce bit length histogram, which is expected to be axis flipped geometric distribution with $p = \frac{1}{2}$ peaking at the bit size of the order of the curve.



In [ ]:

    
fig_bl = plt.figure(figsize=(10.5, 12), dpi=90)
gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])
axe_bl_heat = fig_bl.add_subplot(gs[0], title="Nonce bit length vs signature time")
axe_bl_hist = fig_bl.add_subplot(gs[1], sharex=axe_bl_heat, title="Nonce bit length")
bl_data = np.array(list(map(lambda x: x.bit_length(), data_trimmed["nonce"])), dtype=np.dtype("u2"))

h, xe, ye = np.histogram2d(bl_data, data_trimmed["sign_time"], bins=[max(bl_data) - min(bl_data), hist_size_sign_time_trim])
im = axe_bl_heat.imshow(h.T, origin="low", cmap=cmap, aspect="auto", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)
axe_bl_heat.set_xlabel("nonce bit length")
axe_bl_heat.set_ylabel("signature time ({})".format(sign_disp_unit))

plot_hist(axe_bl_hist, bl_data, "nonce bit length", log_scale, align="right")

fig_bl.tight_layout()
fig_bl.colorbar(im, ax=[axe_bl_heat, axe_bl_hist])

del bl_data

Nonce bit length histogram given time

Interactively shows the histogram of nonce bit length given a selected time range centered around center of width width. Ideally, the means of these conditional distributions are equal, while the variances can vary.



In [ ]:

    
fig_bl_time = plt.figure(figsize=(10.5, 5), dpi=90)
axe_bl_time = fig_bl_time.add_subplot(111)
axe_bl_time.set_autoscalex_on(False)
def f(center, width):
    lower_bnd = center - width/2
    upper_bnd = center + width/2
    values = data_trimmed[np.logical_and(data_trimmed["sign_time"] <= upper_bnd,
                                         data_trimmed["sign_time"] >= lower_bnd)]
    axe_bl_time.clear()
    axe_bl_time.set_title("Nonce bit length, given signature time $\in ({}, {})$ {}".format(int(lower_bnd), int(upper_bnd), sign_disp_unit))
    bl_data = np.array(list(map(lambda x: x.bit_length(), values["nonce"])), dtype=np.dtype("u2"))
    plot_hist(axe_bl_time, bl_data, "nonce bit length", bins=11, range=(bit_size-10, bit_size+1), align="left")
    axe_bl_time.set_xlim((bit_size-10, bit_size))
    fig_bl_time.tight_layout()

center_w = widgets.IntSlider(min=min(data_trimmed["sign_time"]),
                             max=max(data_trimmed["sign_time"]),
                             step=1,
                             value=description_sign_trim.mean,
                             continuous_update=False,
                             description="center {}".format(sign_disp_unit))
width_w = widgets.IntSlider(min=1, max=100, continuous_update=False,
                             description="width {}".format(sign_disp_unit))
w = interactive(f, center=center_w,
                width=width_w)
display(w)

Validation

Perform some tests on the produced data and compare to expected results.

This requires some information about the used curve, enter it below.



In [ ]:

    
p_str = input("The prime specifying the finite field:")
p = int(p_str, 16) if p_str.startswith("0x") else int(p_str)



In [ ]:

    
r_str = input("The order of the curve:")
r = int(r_str, 16) if r_str.startswith("0x") else int(r_str)

All of the following tests should pass (e.g. be true), given a large enough sample.



In [ ]:

    
max_priv = max(data["priv"])
max_nonce = max(data["nonce"])
un = len(np.unique(data["priv"])) != 1
if un:
    print("Private keys are smaller than order:\t\t\t" + str(max_priv < r))
    print("Private keys are larger than prime(if order > prime):\t" + str(r <= p or max_priv > p))
print("Nonces are smaller than order:\t\t\t\t" + str(max_nonce < r))
print("Nonces are larger than prime(if order > prime):\t\t" + str(r <= p or max_nonce > p))
if un:
    print("Private keys reach full bit length of order:\t\t" + str(max_priv.bit_length() == r.bit_length()))
print("Nonces reach full bit length of order:\t\t\t" + str(max_nonce.bit_length() == r.bit_length()))



In [ ]:

    
if un:
    print("Private key bit length (min, max):" + str(min(data["priv"]).bit_length()) + ", " + str(max(data["priv"]).bit_length()))
print("Nonce bit length (min, max):" + str(min(data["nonce"]).bit_length()) + ", " + str(max(data["nonce"]).bit_length()))



In [ ]:

    
print("Nonce uniqueness (no duplicates):" + str(len(np.unique(data["nonce"])) == len(data["nonce"])))