In [55]:

    
import numpy
import scipy
import scipy.stats
import csv
import matplotlib.pyplot
%matplotlib inline

12.1



In [9]:

    
Russia = numpy.array([108.2, 117.4, 123.5, 113.9, 119.0, 124.1, 128.4, 108.5, 118.0, 124.2, 114.5, 119.6, 124.6, 128.7])
Ukraine = numpy.array([107, 116, 118, 101, 105, 111, 112, 109, 117, 121, 103, 108, 114, 115])



In [15]:

    
scipy.stats.kendalltau(Russia, Ukraine)









    Out[15]:





KendalltauResult(correlation=0.40659340659340659, pvalue=0.042809803653435889)

Гипотеза "Выборки независимы" отвергается.

12.2



In [16]:

    
Eat = numpy.array([9, 1, 3, 7, 3, 5, 1, 0, 60, 103, 2, 9, 0, 0])
Gpa = numpy.array([4, 3, 8, 7, 3, 8, 7, 5, 8, 10, 6, 5, 7, 8])



In [19]:

    
scipy.stats.kendalltau(Eat, Gpa)









    Out[19]:





KendalltauResult(correlation=0.16977493752543307, pvalue=0.39767424864640122)

Гипотеза "Выборки независимы" не отвергается.

12.3



In [199]:

    
scipy.stats.spearmanr([1.05, 1.12, 1.37, 1.50, 1.51], [1.73, 1.85, 1.98, 2.03, 2.17])









    Out[199]:





SpearmanrResult(correlation=0.99999999999999989, pvalue=1.4042654220543672e-24)

Следовательно, коэффициенты возрастают, и это явно не случайный вектор с одинакого распределенными независимыми случайными величинами.

12.4



In [27]:

    
task_4 = numpy.array(list(csv.reader(open('task4.txt', 'r'), delimiter=' '))).astype(float)
sample_1 = task_4[:, 0]
sample_2 = task_4[:, 1]



In [30]:

    
scipy.stats.kendalltau(sample_1, sample_2)









    Out[30]:





KendalltauResult(correlation=0.00054316535965837792, pvalue=0.98873083336118517)



In [36]:

    
scipy.stats.spearmanr(sample_1, sample_2)









    Out[36]:





SpearmanrResult(correlation=-0.00065352302302241333, pvalue=0.99094610119301385)

Гипотеза "Выборки независимы" не отвергается.

12.5



In [33]:

    
Treat = numpy.array([853, 1719 - 853])
Placebo = numpy.array([369, 781 - 369])



In [37]:

    
scipy.stats.chi2_contingency(numpy.array([Treat, Placebo]))









    Out[37]:





(1.1188239257192292, 0.29017183413389314, 1, array([[ 840.2472,  878.7528],
        [ 381.7528,  399.2472]]))

Гипотеза "Выборки независимы" не отвергается.

12.6



In [61]:

    
source_6 = numpy.array(list(csv.reader(open('electricity.csv', 'r'))))
task_6 = source_6[1:, :].astype(float)
Consumption = task_6[:, 1]
Temperature = task_6[:, 2]



In [62]:

    
scipy.stats.kendalltau(Consumption, Temperature)









    Out[62]:





KendalltauResult(correlation=0.41337814259124339, pvalue=0.0)

Гипотеза "Выборки независимы" отвергается.



In [63]:

    
grid = task_6[:, 3]



In [65]:

    
matplotlib.pyplot.figure(figsize=(15, 8))
matplotlib.pyplot.plot(grid, Consumption, label='Спрос на электричество', color='red', alpha=0.5)
matplotlib.pyplot.legend()
matplotlib.pyplot.xlabel('Время')
matplotlib.pyplot.grid()
matplotlib.pyplot.show()



In [66]:

    
matplotlib.pyplot.figure(figsize=(15, 8))
matplotlib.pyplot.plot(grid, Temperature, label='Температура', color='red', alpha=0.5)
matplotlib.pyplot.legend()
matplotlib.pyplot.xlabel('Время')
matplotlib.pyplot.grid()
matplotlib.pyplot.show()



In [70]:

    
rho_6 = []
for i in numpy.arange(101):
    rho_6.append(scipy.stats.pearsonr(Consumption, numpy.roll(Consumption, -i))[0])



In [72]:

    
matplotlib.pyplot.figure(figsize=(15, 8))
matplotlib.pyplot.plot(numpy.arange(101), rho_6, label='Пирсон', color='red', alpha=0.5)
matplotlib.pyplot.legend()
matplotlib.pyplot.xlabel('Сдвиг')
matplotlib.pyplot.grid()
matplotlib.pyplot.show()

Максимум приходятся на 0, 48 и 96 получасов, так как в сутках 48 получасов.

12.7



In [198]:

    
sample_1_b = numpy.arange(1000)
sample_2_b = numpy.arange(1000)
sample_1_b[200] = sample_1_b[400] = sample_1_b[600] = sample_1_b[800] = -4500
sample_2_b[200] = sample_2_b[400] = sample_2_b[600] = sample_2_b[800] = 4500
print(scipy.stats.spearmanr(sample_1_b, sample_2_b))
print(scipy.stats.kendalltau(sample_1_b, sample_2_b))
print(scipy.stats.pearsonr(sample_1_b, sample_2_b))









    



SpearmanrResult(correlation=0.97609597466173315, pvalue=0.0)
KendalltauResult(correlation=0.98404785643070802, pvalue=0.0)
(0.02106640739030801, 0.50578257705636576)



In [191]:

    
sample_1_b = numpy.arange(1000)
sample_2_b = numpy.arange(1000)
sample_1_b[200] = sample_1_b[400] = sample_1_b[600] = sample_1_b[800] = -10000000
sample_2_b[200] = sample_2_b[400] = sample_2_b[600] = sample_2_b[800] = 10000000
print(scipy.stats.spearmanr(sample_1_b, sample_2_b))
print(scipy.stats.kendalltau(sample_1_b, sample_2_b))
print(scipy.stats.pearsonr(sample_1_b, sample_2_b))









    



SpearmanrResult(correlation=0.97609597466173315, pvalue=0.0)
KendalltauResult(correlation=0.98404785643070802, pvalue=0.0)
(-0.99999958266449773, 0.0)

12.8

12.9

Пирсон: $c_{ij}(X) = X_i - X_j$.

Спирмэн: $c_{ij}(X) = R_i - R_j$.

Кэндалл: $c_{ij}(X) = sgn(R_j - R_j)$.