In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
In [2]:
data = np.genfromtxt('frequency_data.txt')
In [3]:
frequency_data = data[:, 0]
hour = data[:, 1]
In [4]:
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 10)
fig, ax = plt.subplots()
plt.title("Frequency characteristic 16/06/2017 in Rostock",fontsize=22)
plt.xlabel("Time of day",fontsize=22)
plt.ylabel("f / Hz",fontsize=22)
x_ticks_labels = ["00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00",
"07:00", "08:00", "09:00", "10:00", "11:00", "12:00",
"13:00", "14:00", "15:00", "16:00", "17:00", "18:00",
"19:00", "20:00", "21:00", "22:00", "23:00", "00:00"]
ax.set_xticklabels(x_ticks_labels)
start, end = ax.get_xlim()
ax.xaxis.set_ticks([0, 240, 479, 718, 958, 1197, 1436, 1675, 1914, 2153, 2393,
2632, 2871, 3110, 3349, 3588, 3828, 4067, 4306, 4545,
4784, 5023, 5263, 5502, 5741])
ax.yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
fig.autofmt_xdate()
plt.grid()
plt.plot(frequency_data)
plt.show()
In [5]:
hour01 = np.mean(frequency_data[:240])
hour02 = np.mean(frequency_data[240:479])
hour03 = np.mean(frequency_data[479:718])
hour04 = np.mean(frequency_data[718:958])
hour05 = np.mean(frequency_data[958:1197])
hour06 = np.mean(frequency_data[1197:1436])
hour07 = np.mean(frequency_data[1436:1675])
hour08 = np.mean(frequency_data[1675:1914])
hour09 = np.mean(frequency_data[1914:2153])
hour10 = np.mean(frequency_data[2153:2393])
hour11 = np.mean(frequency_data[2393:2632])
hour12 = np.mean(frequency_data[2632:2871])
hour13 = np.mean(frequency_data[2871:3110])
hour14 = np.mean(frequency_data[3110:3349])
hour15 = np.mean(frequency_data[3349:3588])
hour16 = np.mean(frequency_data[3588:3828])
hour17 = np.mean(frequency_data[3828:4067])
hour18 = np.mean(frequency_data[4067:4306])
hour19 = np.mean(frequency_data[4306:4545])
hour20 = np.mean(frequency_data[4545:4784])
hour21 = np.mean(frequency_data[4784:5023])
hour22 = np.mean(frequency_data[5023:5263])
hour23 = np.mean(frequency_data[5263:5502])
hour00 = np.mean(frequency_data[5502:5741])
In [6]:
hour_array = np.array([hour01, hour02, hour03, hour04, hour05, hour06, hour07, hour08, hour09, hour10,
hour11, hour12, hour13, hour14, hour15, hour16, hour17, hour18, hour19, hour20,
hour21, hour22, hour23, hour00])
In [7]:
fig, ax2 = plt.subplots()
plt.title("Frequency characteristic 16/06/2017 in Rostock",fontsize=22)
plt.xlabel("Time of day",fontsize=22)
plt.ylabel("f / Hz",fontsize=22)
x_ticks_labels = ["01:00", "02:00", "03:00", "04:00", "05:00", "06:00",
"07:00", "08:00", "09:00", "10:00", "11:00", "12:00",
"13:00", "14:00", "15:00", "16:00", "17:00", "18:00",
"19:00", "20:00", "21:00", "22:00", "23:00", "00:00"]
ax2.set_xticklabels(x_ticks_labels)
start, end = ax.get_xlim()
ax2.xaxis.set_ticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24])
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
ax2.yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
fig.autofmt_xdate()
plt.grid()
plt.ylim([49.97,50.04])
plt.plot(hour_array, linewidth=2, label='temporal average of mains frequency')
ax2.axhline(y=50.00, color="k",linewidth=1)
axhspan(49.99, 50.01 , facecolor='g', alpha=0.5, label='dead band')
axhspan(50.01, 50.04 , facecolor='r', alpha=0.5, label='adjustment')
axhspan(49.99, 49.97 , facecolor='r', alpha=0.5)
ax2.legend(loc='lower right')
plt.show()
In [8]:
hour_array
Out[8]:
In [9]:
bins = np.array([ 49.934 , 49.93697959, 49.93995918, 49.94293878,
49.94591837, 49.94889796, 49.95187755, 49.95485714,
49.95783673, 49.96081633, 49.96379592, 49.96677551,
49.9697551 , 49.97273469, 49.97571429, 49.97869388,
49.98167347, 49.98465306, 49.98763265, 49.99061224,
49.99359184, 49.99657143, 49.99955102, 50.00253061,
50.0055102 , 50.0084898 , 50.01146939, 50.01444898,
50.01742857, 50.02040816, 50.02338776, 50.02636735,
50.02934694, 50.03232653, 50.03530612, 50.03828571,
50.04126531, 50.0442449 , 50.04722449, 50.05020408,
50.05318367, 50.05616327, 50.05914286, 50.06212245,
50.06510204, 50.06808163, 50.07106122, 50.07404082,
50.07702041, 50.08 ])
In [10]:
my = np.mean(frequency_data)
sig = np.std(frequency_data)
In [11]:
plt.hist(frequency_data, bins=bins)
plt.title(" Histogram $\mu$ = %.3f, $\sigma$ = %.3f" % (my, sig),fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("Absolute Frequency",fontsize=22)
plt.grid()
plt.show()
In [12]:
weights = np.ones_like(frequency_data)/float(len(frequency_data))
counts, bin_edges = np.histogram(frequency_data, bins=bins, weights=weights)
cdf = np.cumsum(counts)
plt.title("Cumulative Density Function" ,fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("CDF",fontsize=22)
plt.grid()
plt.plot(bin_edges[1:], cdf, linewidth=1.6)
plt.show()
In [13]:
from scipy import stats
stats.anderson(frequency_data)
Out[13]:
In [14]:
stats.kstest(frequency_data, 'norm')
Out[14]:
In [15]:
x = np.random.normal(0,1,1000)
test_stat = stats.kstest(x, 'norm')
In [16]:
stats.anderson(x)
Out[16]:
In [ ]:
In [ ]:
In [17]:
poisson = np.random.poisson(5, 10000)
In [18]:
stats.kstest(poisson, 'norm')
Out[18]:
"If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same."
In [19]:
frequency_data_normalized = (frequency_data - np.mean(frequency_data)) / np.std(frequency_data)
In [20]:
from scipy.stats import norm
counts, bin_edges = np.histogram(frequency_data_normalized, weights=weights, bins = 139)
cdf = np.cumsum(counts)
plt.title("Visual comparison between CDF of data and standard normal" ,fontsize=22)
plt.xlabel("f / Hz",fontsize=22)
plt.ylabel("CDF",fontsize=22)
plt.grid()
plt.plot(bin_edges[1:], cdf, linewidth=3, color='b', label='Data CDF')
standard = norm.cdf(bin_edges)
plt.plot(bin_edges, standard, linewidth=3, color='r', label='Standard Normal CDF')
plt.ylim([0,1])
plt.legend(loc='lower right')
plt.show()
In [21]:
stats.kstest(frequency_data_normalized, 'norm')
Out[21]:
In [22]:
stats.kstest(frequency_data_normalized, 'norm')
Out[22]: