In [1]:
require 'mikon'


Out[1]:
Out[1]:
true

In [2]:
require 'statsample'


Out[2]:
true

In [3]:
df1 = Mikon::DataFrame.from_csv("/home/domitry/test.csv")


Out[3]:
country_or_areayearvaluevalue_footnotes
0Afghanistan20123.55234032198702
1Afghanistan20114.78662767565472
2Afghanistan20103.66803325962668
3Afghanistan20092.04265078269294
4Afghanistan20082.21057537126712
5Afghanistan20072.17127326265904
6Afghanistan20061.76635442957503
7Afghanistan20051.77275777834904
8Afghanistan20042.15363123609057
9Afghanistan20032.08503748890737
10Albania20121.45505861992816
11Albania20111.51924820573228
12Albania20101.60235862337985
13Albania20092.05325803649001
14Albania20081.97150735294118
15Albania20071.82014462809917
16Albania20061.54374966770579
17Albania20051.31487718388222
18Albania20041.35350745492471
19Albania20031.35922390513751
20Albania20021.31579371172281
21Albania20011.29960736181529
22Albania20001.2278986601211
23Albania19991.24206452644451
24Albania19981.2288819931238
25Albania19971.37870662927084
26Albania19961.51709550445667
27Albania19952.09971303737436
28Albania19942.50038576236814
29Albania19933.19929790315127
30Albania19924.64945888260734
31Albania19905.88830083175259
32Albania19895.16567637335278
33Albania19885.61500470366886
34Algeria20124.56352306707531
35Algeria20114.3457876421571
36Algeria20103.5191406518711
37Algeria20093.85231767018087
38Algeria20083.02434776522206
39Algeria20072.91888501928132
40Algeria20062.64654950286156
41Algeria20052.82994500755648
42Algeria20043.28502466053376
43Algeria20033.25570346413132
44Algeria20023.69242473417803
45Algeria20013.83240278099018
46Algeria20003.44367652668923
47Algeria19993.76752507239682
48Algeria19983.95689793175291
49Algeria19973.63283228608796
50Algeria19963.09338520538001
...............
4127Zimbabwe19884.95503147380905

In [4]:
df1.insert_column(:new_value){value + rand(-1.0..1.0)}


Out[4]:
country_or_areayearvaluevalue_footnotesnew_value
0Afghanistan20123.552340321987023.365237084162124
1Afghanistan20114.786627675654723.9451668538722586
2Afghanistan20103.668033259626682.856013622682205
3Afghanistan20092.042650782692942.933876167842473
4Afghanistan20082.210575371267121.7195780051190266
5Afghanistan20072.171273262659041.7595650544783588
6Afghanistan20061.766354429575030.9368855452634632
7Afghanistan20051.772757778349041.0581659627086681
8Afghanistan20042.153631236090571.8110235544530238
9Afghanistan20032.085037488907371.4186650724444383
10Albania20121.455058619928162.0775776362578857
11Albania20111.519248205732282.0358106972116943
12Albania20101.602358623379851.9739389479710783
13Albania20092.053258036490012.1288913914078527
14Albania20081.971507352941182.0583376681409318
15Albania20071.820144628099171.2548082598809185
16Albania20061.543749667705791.9971020485807411
17Albania20051.314877183882221.7511857562740036
18Albania20041.353507454924711.8483476512943486
19Albania20031.359223905137510.8840770979413088
20Albania20021.315793711722810.8313917588040536
21Albania20011.299607361815290.7445493102644001
22Albania20001.22789866012110.8642067486120852
23Albania19991.242064526444511.7914599836432876
24Albania19981.22888199312380.9489923780923286
25Albania19971.378706629270841.6357986769720738
26Albania19961.517095504456671.9087146554575822
27Albania19952.099713037374361.4576482430477247
28Albania19942.500385762368142.4956480482781735
29Albania19933.199297903151272.383331232822835
30Albania19924.649458882607345.190866837887674
31Albania19905.888300831752595.067854757737055
32Albania19895.165676373352785.906669993635726
33Albania19885.615004703668866.2765755885019185
34Algeria20124.563523067075313.923312230807956
35Algeria20114.34578764215714.275598054489994
36Algeria20103.51914065187112.920843064888935
37Algeria20093.852317670180874.50929651696243
38Algeria20083.024347765222063.621977821392032
39Algeria20072.918885019281322.964503545919998
40Algeria20062.646549502861562.6232410170050064
41Algeria20052.829945007556483.607501387344014
42Algeria20043.285024660533762.626784383005957
43Algeria20033.255703464131322.9442090216259125
44Algeria20023.692424734178034.507199324692448
45Algeria20013.832402780990183.758973671236258
46Algeria20003.443676526689233.037096965272122
47Algeria19993.767525072396823.6477611654852105
48Algeria19983.956897931752914.626305378224886
49Algeria19973.632832286087963.026504560801546
50Algeria19963.093385205380012.465815441783512
..................
4127Zimbabwe19884.955031473809054.840316350757924

In [7]:
df1.select{value > 100}


Out[7]:
country_or_areayearvaluevalue_footnotesnew_value
0Kuwait1991117.387694904726118.28002431223584

In [8]:
df1 = df1.select{value < 100}


Out[8]:
country_or_areayearvaluevalue_footnotesnew_value
0Afghanistan20123.552340321987023.365237084162124
1Afghanistan20114.786627675654723.9451668538722586
2Afghanistan20103.668033259626682.856013622682205
3Afghanistan20092.042650782692942.933876167842473
4Afghanistan20082.210575371267121.7195780051190266
5Afghanistan20072.171273262659041.7595650544783588
6Afghanistan20061.766354429575030.9368855452634632
7Afghanistan20051.772757778349041.0581659627086681
8Afghanistan20042.153631236090571.8110235544530238
9Afghanistan20032.085037488907371.4186650724444383
10Albania20121.455058619928162.0775776362578857
11Albania20111.519248205732282.0358106972116943
12Albania20101.602358623379851.9739389479710783
13Albania20092.053258036490012.1288913914078527
14Albania20081.971507352941182.0583376681409318
15Albania20071.820144628099171.2548082598809185
16Albania20061.543749667705791.9971020485807411
17Albania20051.314877183882221.7511857562740036
18Albania20041.353507454924711.8483476512943486
19Albania20031.359223905137510.8840770979413088
20Albania20021.315793711722810.8313917588040536
21Albania20011.299607361815290.7445493102644001
22Albania20001.22789866012110.8642067486120852
23Albania19991.242064526444511.7914599836432876
24Albania19981.22888199312380.9489923780923286
25Albania19971.378706629270841.6357986769720738
26Albania19961.517095504456671.9087146554575822
27Albania19952.099713037374361.4576482430477247
28Albania19942.500385762368142.4956480482781735
29Albania19933.199297903151272.383331232822835
30Albania19924.649458882607345.190866837887674
31Albania19905.888300831752595.067854757737055
32Albania19895.165676373352785.906669993635726
33Albania19885.615004703668866.2765755885019185
34Algeria20124.563523067075313.923312230807956
35Algeria20114.34578764215714.275598054489994
36Algeria20103.51914065187112.920843064888935
37Algeria20093.852317670180874.50929651696243
38Algeria20083.024347765222063.621977821392032
39Algeria20072.918885019281322.964503545919998
40Algeria20062.646549502861562.6232410170050064
41Algeria20052.829945007556483.607501387344014
42Algeria20043.285024660533762.626784383005957
43Algeria20033.255703464131322.9442090216259125
44Algeria20023.692424734178034.507199324692448
45Algeria20013.832402780990183.758973671236258
46Algeria20003.443676526689233.037096965272122
47Algeria19993.767525072396823.6477611654852105
48Algeria19983.956897931752914.626305378224886
49Algeria19973.632832286087963.026504560801546
50Algeria19963.093385205380012.465815441783512
..................
4126Zimbabwe19884.955031473809054.840316350757924

In [9]:
df1[:value].plot


Out[9]:

In [10]:
df1[:new_value].plot


Out[10]:

In [5]:
Statsample::Analysis.store(Statsample::Test::T) do
  t_2 = Statsample::Test.t_two_samples_independent(df1[:value], df1[:new_value])
  summary t_2
end


Out[5]:
#<Statsample::Analysis::Suite:0xb8893b50 @block=#<Proc:0xb8893bc8@<main>:0>, @name=Statsample::Test::T, @attached=[], @output=#<IO:<STDOUT>>>

In [6]:
Statsample::Analysis.run_batch


Analysis 2014-10-26 22:43:32 +0900
= Statsample::Test::T
  == Two Sample T Test
    Mean and standard deviation
+-----------+--------+--------+------+
| Variable  |  mean  |   sd   |  n   |
+-----------+--------+--------+------+
| value     | 2.6196 | 3.1559 | 4128 |
| new_value | 2.6143 | 3.2142 | 4128 |
+-----------+--------+--------+------+

    Levene test for equality of variances : F(1, 8254) = 1.4994 , p = 0.2208
    T statistics
+--------------------+--------+-----------+----------------+
|        Type        |   t    |    df     | p (both tails) |
+--------------------+--------+-----------+----------------+
| Equal variance     | 0.0747 | 8254      | 0.9404         |
| Non equal variance | 0.0747 | 8251.2407 | 0.9404         |
+--------------------+--------+-----------+----------------+

    Effect size
+-------+--------+
| x1-x2 | 0.0052 |
| d     | 0.0029 |
+-------+--------+



In [21]:
t_1=Statsample::Test::T::OneSample.new(df1[:value], {:u=>50})
t_1.summary


Out[21]:
"= One Sample T Test\n  Sample mean: 2.6196 | Sample sd: 3.1559 | se : 0.0491\n  Population mean: 50.0000\n  t(4127) = -964.5896, p=0.0000 (both tails)\n  CI(95%): -47.4767 - -47.2841\n"