In [1]:
require 'mikon'
Out[1]:
Out[1]:
true
In [2]:
require 'statsample'
Out[2]:
true
In [3]:
df1 = Mikon::DataFrame.from_csv("/home/domitry/test.csv")
Out[3]:
country_or_area year value value_footnotes 0 Afghanistan 2012 3.55234032198702 1 Afghanistan 2011 4.78662767565472 2 Afghanistan 2010 3.66803325962668 3 Afghanistan 2009 2.04265078269294 4 Afghanistan 2008 2.21057537126712 5 Afghanistan 2007 2.17127326265904 6 Afghanistan 2006 1.76635442957503 7 Afghanistan 2005 1.77275777834904 8 Afghanistan 2004 2.15363123609057 9 Afghanistan 2003 2.08503748890737 10 Albania 2012 1.45505861992816 11 Albania 2011 1.51924820573228 12 Albania 2010 1.60235862337985 13 Albania 2009 2.05325803649001 14 Albania 2008 1.97150735294118 15 Albania 2007 1.82014462809917 16 Albania 2006 1.54374966770579 17 Albania 2005 1.31487718388222 18 Albania 2004 1.35350745492471 19 Albania 2003 1.35922390513751 20 Albania 2002 1.31579371172281 21 Albania 2001 1.29960736181529 22 Albania 2000 1.2278986601211 23 Albania 1999 1.24206452644451 24 Albania 1998 1.2288819931238 25 Albania 1997 1.37870662927084 26 Albania 1996 1.51709550445667 27 Albania 1995 2.09971303737436 28 Albania 1994 2.50038576236814 29 Albania 1993 3.19929790315127 30 Albania 1992 4.64945888260734 31 Albania 1990 5.88830083175259 32 Albania 1989 5.16567637335278 33 Albania 1988 5.61500470366886 34 Algeria 2012 4.56352306707531 35 Algeria 2011 4.3457876421571 36 Algeria 2010 3.5191406518711 37 Algeria 2009 3.85231767018087 38 Algeria 2008 3.02434776522206 39 Algeria 2007 2.91888501928132 40 Algeria 2006 2.64654950286156 41 Algeria 2005 2.82994500755648 42 Algeria 2004 3.28502466053376 43 Algeria 2003 3.25570346413132 44 Algeria 2002 3.69242473417803 45 Algeria 2001 3.83240278099018 46 Algeria 2000 3.44367652668923 47 Algeria 1999 3.76752507239682 48 Algeria 1998 3.95689793175291 49 Algeria 1997 3.63283228608796 50 Algeria 1996 3.09338520538001 ... ... ... ... ... 4127 Zimbabwe 1988 4.95503147380905
In [4]:
df1.insert_column(:new_value){value + rand(-1.0..1.0)}
Out[4]:
country_or_area year value value_footnotes new_value 0 Afghanistan 2012 3.55234032198702 3.365237084162124 1 Afghanistan 2011 4.78662767565472 3.9451668538722586 2 Afghanistan 2010 3.66803325962668 2.856013622682205 3 Afghanistan 2009 2.04265078269294 2.933876167842473 4 Afghanistan 2008 2.21057537126712 1.7195780051190266 5 Afghanistan 2007 2.17127326265904 1.7595650544783588 6 Afghanistan 2006 1.76635442957503 0.9368855452634632 7 Afghanistan 2005 1.77275777834904 1.0581659627086681 8 Afghanistan 2004 2.15363123609057 1.8110235544530238 9 Afghanistan 2003 2.08503748890737 1.4186650724444383 10 Albania 2012 1.45505861992816 2.0775776362578857 11 Albania 2011 1.51924820573228 2.0358106972116943 12 Albania 2010 1.60235862337985 1.9739389479710783 13 Albania 2009 2.05325803649001 2.1288913914078527 14 Albania 2008 1.97150735294118 2.0583376681409318 15 Albania 2007 1.82014462809917 1.2548082598809185 16 Albania 2006 1.54374966770579 1.9971020485807411 17 Albania 2005 1.31487718388222 1.7511857562740036 18 Albania 2004 1.35350745492471 1.8483476512943486 19 Albania 2003 1.35922390513751 0.8840770979413088 20 Albania 2002 1.31579371172281 0.8313917588040536 21 Albania 2001 1.29960736181529 0.7445493102644001 22 Albania 2000 1.2278986601211 0.8642067486120852 23 Albania 1999 1.24206452644451 1.7914599836432876 24 Albania 1998 1.2288819931238 0.9489923780923286 25 Albania 1997 1.37870662927084 1.6357986769720738 26 Albania 1996 1.51709550445667 1.9087146554575822 27 Albania 1995 2.09971303737436 1.4576482430477247 28 Albania 1994 2.50038576236814 2.4956480482781735 29 Albania 1993 3.19929790315127 2.383331232822835 30 Albania 1992 4.64945888260734 5.190866837887674 31 Albania 1990 5.88830083175259 5.067854757737055 32 Albania 1989 5.16567637335278 5.906669993635726 33 Albania 1988 5.61500470366886 6.2765755885019185 34 Algeria 2012 4.56352306707531 3.923312230807956 35 Algeria 2011 4.3457876421571 4.275598054489994 36 Algeria 2010 3.5191406518711 2.920843064888935 37 Algeria 2009 3.85231767018087 4.50929651696243 38 Algeria 2008 3.02434776522206 3.621977821392032 39 Algeria 2007 2.91888501928132 2.964503545919998 40 Algeria 2006 2.64654950286156 2.6232410170050064 41 Algeria 2005 2.82994500755648 3.607501387344014 42 Algeria 2004 3.28502466053376 2.626784383005957 43 Algeria 2003 3.25570346413132 2.9442090216259125 44 Algeria 2002 3.69242473417803 4.507199324692448 45 Algeria 2001 3.83240278099018 3.758973671236258 46 Algeria 2000 3.44367652668923 3.037096965272122 47 Algeria 1999 3.76752507239682 3.6477611654852105 48 Algeria 1998 3.95689793175291 4.626305378224886 49 Algeria 1997 3.63283228608796 3.026504560801546 50 Algeria 1996 3.09338520538001 2.465815441783512 ... ... ... ... ... ... 4127 Zimbabwe 1988 4.95503147380905 4.840316350757924
In [7]:
df1.select{value > 100}
Out[7]:
country_or_area year value value_footnotes new_value 0 Kuwait 1991 117.387694904726 118.28002431223584
In [8]:
df1 = df1.select{value < 100}
Out[8]:
country_or_area year value value_footnotes new_value 0 Afghanistan 2012 3.55234032198702 3.365237084162124 1 Afghanistan 2011 4.78662767565472 3.9451668538722586 2 Afghanistan 2010 3.66803325962668 2.856013622682205 3 Afghanistan 2009 2.04265078269294 2.933876167842473 4 Afghanistan 2008 2.21057537126712 1.7195780051190266 5 Afghanistan 2007 2.17127326265904 1.7595650544783588 6 Afghanistan 2006 1.76635442957503 0.9368855452634632 7 Afghanistan 2005 1.77275777834904 1.0581659627086681 8 Afghanistan 2004 2.15363123609057 1.8110235544530238 9 Afghanistan 2003 2.08503748890737 1.4186650724444383 10 Albania 2012 1.45505861992816 2.0775776362578857 11 Albania 2011 1.51924820573228 2.0358106972116943 12 Albania 2010 1.60235862337985 1.9739389479710783 13 Albania 2009 2.05325803649001 2.1288913914078527 14 Albania 2008 1.97150735294118 2.0583376681409318 15 Albania 2007 1.82014462809917 1.2548082598809185 16 Albania 2006 1.54374966770579 1.9971020485807411 17 Albania 2005 1.31487718388222 1.7511857562740036 18 Albania 2004 1.35350745492471 1.8483476512943486 19 Albania 2003 1.35922390513751 0.8840770979413088 20 Albania 2002 1.31579371172281 0.8313917588040536 21 Albania 2001 1.29960736181529 0.7445493102644001 22 Albania 2000 1.2278986601211 0.8642067486120852 23 Albania 1999 1.24206452644451 1.7914599836432876 24 Albania 1998 1.2288819931238 0.9489923780923286 25 Albania 1997 1.37870662927084 1.6357986769720738 26 Albania 1996 1.51709550445667 1.9087146554575822 27 Albania 1995 2.09971303737436 1.4576482430477247 28 Albania 1994 2.50038576236814 2.4956480482781735 29 Albania 1993 3.19929790315127 2.383331232822835 30 Albania 1992 4.64945888260734 5.190866837887674 31 Albania 1990 5.88830083175259 5.067854757737055 32 Albania 1989 5.16567637335278 5.906669993635726 33 Albania 1988 5.61500470366886 6.2765755885019185 34 Algeria 2012 4.56352306707531 3.923312230807956 35 Algeria 2011 4.3457876421571 4.275598054489994 36 Algeria 2010 3.5191406518711 2.920843064888935 37 Algeria 2009 3.85231767018087 4.50929651696243 38 Algeria 2008 3.02434776522206 3.621977821392032 39 Algeria 2007 2.91888501928132 2.964503545919998 40 Algeria 2006 2.64654950286156 2.6232410170050064 41 Algeria 2005 2.82994500755648 3.607501387344014 42 Algeria 2004 3.28502466053376 2.626784383005957 43 Algeria 2003 3.25570346413132 2.9442090216259125 44 Algeria 2002 3.69242473417803 4.507199324692448 45 Algeria 2001 3.83240278099018 3.758973671236258 46 Algeria 2000 3.44367652668923 3.037096965272122 47 Algeria 1999 3.76752507239682 3.6477611654852105 48 Algeria 1998 3.95689793175291 4.626305378224886 49 Algeria 1997 3.63283228608796 3.026504560801546 50 Algeria 1996 3.09338520538001 2.465815441783512 ... ... ... ... ... ... 4126 Zimbabwe 1988 4.95503147380905 4.840316350757924
In [9]:
df1[:value].plot
Out[9]:
In [10]:
df1[:new_value].plot
Out[10]:
In [5]:
Statsample::Analysis.store(Statsample::Test::T) do
t_2 = Statsample::Test.t_two_samples_independent(df1[:value], df1[:new_value])
summary t_2
end
Out[5]:
#<Statsample::Analysis::Suite:0xb8893b50 @block=#<Proc:0xb8893bc8@<main>:0>, @name=Statsample::Test::T, @attached=[], @output=#<IO:<STDOUT>>>
In [6]:
Statsample::Analysis.run_batch
Analysis 2014-10-26 22:43:32 +0900
= Statsample::Test::T
== Two Sample T Test
Mean and standard deviation
+-----------+--------+--------+------+
| Variable | mean | sd | n |
+-----------+--------+--------+------+
| value | 2.6196 | 3.1559 | 4128 |
| new_value | 2.6143 | 3.2142 | 4128 |
+-----------+--------+--------+------+
Levene test for equality of variances : F(1, 8254) = 1.4994 , p = 0.2208
T statistics
+--------------------+--------+-----------+----------------+
| Type | t | df | p (both tails) |
+--------------------+--------+-----------+----------------+
| Equal variance | 0.0747 | 8254 | 0.9404 |
| Non equal variance | 0.0747 | 8251.2407 | 0.9404 |
+--------------------+--------+-----------+----------------+
Effect size
+-------+--------+
| x1-x2 | 0.0052 |
| d | 0.0029 |
+-------+--------+
In [21]:
t_1=Statsample::Test::T::OneSample.new(df1[:value], {:u=>50})
t_1.summary
Out[21]:
"= One Sample T Test\n Sample mean: 2.6196 | Sample sd: 3.1559 | se : 0.0491\n Population mean: 50.0000\n t(4127) = -964.5896, p=0.0000 (both tails)\n CI(95%): -47.4767 - -47.2841\n"
Content source: domitry/mikon
Similar notebooks: