Mains frequency measurement for one day


In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

In [2]:
data = np.genfromtxt('frequency_data.txt')

In [3]:
frequency_data = data[:, 0]
hour = data[:, 1]

In [4]:
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 10)
fig, ax = plt.subplots()


plt.title("Frequency characteristic 16/06/2017 in Rostock",fontsize=22)
plt.xlabel("Time of day",fontsize=22)
plt.ylabel("f / Hz",fontsize=22)

x_ticks_labels = ["00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00",
                  "07:00", "08:00", "09:00", "10:00", "11:00", "12:00",
                  "13:00", "14:00", "15:00", "16:00", "17:00", "18:00",
                  "19:00", "20:00", "21:00", "22:00", "23:00", "00:00"]

ax.set_xticklabels(x_ticks_labels)
start, end = ax.get_xlim()
ax.xaxis.set_ticks([0, 240, 479, 718, 958, 1197, 1436, 1675, 1914, 2153, 2393,
                    2632, 2871, 3110, 3349, 3588, 3828, 4067, 4306, 4545,
                    4784, 5023, 5263, 5502, 5741])

ax.yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
fig.autofmt_xdate()
plt.grid()
plt.plot(frequency_data)

plt.show()


Populating the interactive namespace from numpy and matplotlib

Averaging mains frequency per hour


In [5]:
hour01 = np.mean(frequency_data[:240])
hour02 = np.mean(frequency_data[240:479])
hour03 = np.mean(frequency_data[479:718])
hour04 = np.mean(frequency_data[718:958])
hour05 = np.mean(frequency_data[958:1197])
hour06 = np.mean(frequency_data[1197:1436])
hour07 = np.mean(frequency_data[1436:1675])
hour08 = np.mean(frequency_data[1675:1914])
hour09 = np.mean(frequency_data[1914:2153])
hour10 = np.mean(frequency_data[2153:2393])
hour11 = np.mean(frequency_data[2393:2632])
hour12 = np.mean(frequency_data[2632:2871])
hour13 = np.mean(frequency_data[2871:3110])
hour14 = np.mean(frequency_data[3110:3349])
hour15 = np.mean(frequency_data[3349:3588])
hour16 = np.mean(frequency_data[3588:3828])
hour17 = np.mean(frequency_data[3828:4067])
hour18 = np.mean(frequency_data[4067:4306])
hour19 = np.mean(frequency_data[4306:4545])
hour20 = np.mean(frequency_data[4545:4784])
hour21 = np.mean(frequency_data[4784:5023])
hour22 = np.mean(frequency_data[5023:5263])
hour23 = np.mean(frequency_data[5263:5502])
hour00 = np.mean(frequency_data[5502:5741])

In [6]:
hour_array = np.array([hour01, hour02, hour03, hour04, hour05, hour06, hour07, hour08, hour09, hour10,
                      hour11, hour12, hour13, hour14, hour15, hour16, hour17, hour18, hour19, hour20,
                      hour21, hour22, hour23, hour00])

In [7]:
fig, ax2 = plt.subplots()


plt.title("Frequency characteristic 16/06/2017 in Rostock",fontsize=22)
plt.xlabel("Time of day",fontsize=22)
plt.ylabel("f / Hz",fontsize=22)

x_ticks_labels = ["01:00", "02:00", "03:00", "04:00", "05:00", "06:00",
                  "07:00", "08:00", "09:00", "10:00", "11:00", "12:00",
                  "13:00", "14:00", "15:00", "16:00", "17:00", "18:00",
                  "19:00", "20:00", "21:00", "22:00", "23:00", "00:00"]

ax2.set_xticklabels(x_ticks_labels)
start, end = ax.get_xlim()
ax2.xaxis.set_ticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
                     21, 22, 23, 24])

x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

ax2.yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
fig.autofmt_xdate()
plt.grid()
plt.ylim([49.97,50.04])
plt.plot(hour_array, linewidth=2, label='temporal average of mains frequency')
ax2.axhline(y=50.00, color="k",linewidth=1)
axhspan(49.99, 50.01 , facecolor='g', alpha=0.5, label='dead band')
axhspan(50.01, 50.04 , facecolor='r', alpha=0.5, label='adjustment')
axhspan(49.99, 49.97 , facecolor='r', alpha=0.5)
ax2.legend(loc='lower right')
plt.show()



In [8]:
hour_array


Out[8]:
array([ 50.01078333,  49.99338075,  49.99929707,  49.992975  ,
        50.00352301,  50.00957741,  50.00379498,  50.00119665,
        50.01045607,  50.0035    ,  49.99538075,  49.99648954,
        49.99737238,  49.99843096,  49.99539749,  49.99571667,
        49.99582008,  50.02070711,  50.02456067,  50.02361506,
        50.03486192,  50.02264167,  50.01413389,  50.01377406])

In [9]:
bins = np.array([ 49.934     ,  49.93697959,  49.93995918,  49.94293878,
        49.94591837,  49.94889796,  49.95187755,  49.95485714,
        49.95783673,  49.96081633,  49.96379592,  49.96677551,
        49.9697551 ,  49.97273469,  49.97571429,  49.97869388,
        49.98167347,  49.98465306,  49.98763265,  49.99061224,
        49.99359184,  49.99657143,  49.99955102,  50.00253061,
        50.0055102 ,  50.0084898 ,  50.01146939,  50.01444898,
        50.01742857,  50.02040816,  50.02338776,  50.02636735,
        50.02934694,  50.03232653,  50.03530612,  50.03828571,
        50.04126531,  50.0442449 ,  50.04722449,  50.05020408,
        50.05318367,  50.05616327,  50.05914286,  50.06212245,
        50.06510204,  50.06808163,  50.07106122,  50.07404082,
        50.07702041,  50.08      ])

In [10]:
my = np.mean(frequency_data)
sig = np.std(frequency_data)

In [11]:
plt.hist(frequency_data, bins=bins)
plt.title(" Histogram $\mu$ =  %.3f, $\sigma$ = %.3f" % (my, sig),fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("Absolute Frequency",fontsize=22)
plt.grid()
plt.show()



In [12]:
weights = np.ones_like(frequency_data)/float(len(frequency_data))
counts, bin_edges = np.histogram(frequency_data, bins=bins, weights=weights)
cdf = np.cumsum(counts)
plt.title("Cumulative Density Function" ,fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("CDF",fontsize=22)
plt.grid()
plt.plot(bin_edges[1:], cdf, linewidth=1.6)

plt.show()



In [13]:
from scipy import stats
stats.anderson(frequency_data)


Out[13]:
AndersonResult(statistic=1.1749407422948934, critical_values=array([ 0.576,  0.656,  0.786,  0.917,  1.091]), significance_level=array([ 15. ,  10. ,   5. ,   2.5,   1. ]))

In [14]:
stats.kstest(frequency_data, 'norm')


Out[14]:
KstestResult(statistic=1.0, pvalue=0.0)

In [15]:
x = np.random.normal(0,1,1000)

test_stat = stats.kstest(x, 'norm')

In [16]:
stats.anderson(x)


Out[16]:
AndersonResult(statistic=0.47824099056447267, critical_values=array([ 0.574,  0.653,  0.784,  0.914,  1.088]), significance_level=array([ 15. ,  10. ,   5. ,   2.5,   1. ]))

In [ ]:


In [ ]:


In [17]:
poisson = np.random.poisson(5, 10000)

In [18]:
stats.kstest(poisson, 'norm')


Out[18]:
KstestResult(statistic=0.93804986805182078, pvalue=0.0)

"If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same."

Normalizing Data


In [19]:
frequency_data_normalized = (frequency_data - np.mean(frequency_data)) / np.std(frequency_data)

In [20]:
from scipy.stats import norm
counts, bin_edges = np.histogram(frequency_data_normalized, weights=weights, bins = 139)
cdf = np.cumsum(counts)
plt.title("Visual comparison between CDF of data and standard normal" ,fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("CDF",fontsize=22)
plt.grid()
plt.plot(bin_edges[1:], cdf, linewidth=3, color='b', label='Data CDF')
standard = norm.cdf(bin_edges)
plt.plot(bin_edges, standard, linewidth=3, color='r', label='Standard Normal CDF')
plt.ylim([0,1])
plt.legend(loc='lower right')

plt.show()


Kolmogorov-Smirnov-Test


In [21]:
stats.kstest(frequency_data_normalized, 'norm')


Out[21]:
KstestResult(statistic=0.015139033623445597, pvalue=0.14381220725923327)

In [22]:
stats.kstest(frequency_data_normalized, 'norm')


Out[22]:
KstestResult(statistic=0.015139033623445597, pvalue=0.14381220725923327)