In [10]:
import pandas as pd
data = pd.read_csv('/Users/markhoughton/Documents/code_projects/py_projects/euroscipy_2015/stats/examples/brain_size.csv', sep=';', na_values=".")
data


Out[10]:
Unnamed: 0 Gender FSIQ VIQ PIQ Weight Height MRI_Count
0 1 Female 133 132 124 118 64.5 816932
1 2 Male 140 150 124 NaN 72.5 1001121
2 3 Male 139 123 150 143 73.3 1038437
3 4 Male 133 129 128 172 68.8 965353
4 5 Female 137 132 134 147 65.0 951545
5 6 Female 99 90 110 146 69.0 928799
6 7 Female 138 136 131 138 64.5 991305
7 8 Female 92 90 98 175 66.0 854258
8 9 Male 89 93 84 134 66.3 904858
9 10 Male 133 114 147 172 68.8 955466
10 11 Female 132 129 124 118 64.5 833868
11 12 Male 141 150 128 151 70.0 1079549
12 13 Male 135 129 124 155 69.0 924059
13 14 Female 140 120 147 155 70.5 856472
14 15 Female 96 100 90 146 66.0 878897
15 16 Female 83 71 96 135 68.0 865363
16 17 Female 132 132 120 127 68.5 852244
17 18 Male 100 96 102 178 73.5 945088
18 19 Female 101 112 84 136 66.3 808020
19 20 Male 80 77 86 180 70.0 889083
20 21 Male 83 83 86 NaN NaN 892420
21 22 Male 97 107 84 186 76.5 905940
22 23 Female 135 129 134 122 62.0 790619
23 24 Male 139 145 128 132 68.0 955003
24 25 Female 91 86 102 114 63.0 831772
25 26 Male 141 145 131 171 72.0 935494
26 27 Female 85 90 84 140 68.0 798612
27 28 Male 103 96 110 187 77.0 1062462
28 29 Female 77 83 72 106 63.0 793549
29 30 Female 130 126 124 159 66.5 866662
30 31 Female 133 126 132 127 62.5 857782
31 32 Male 144 145 137 191 67.0 949589
32 33 Male 103 96 110 192 75.5 997925
33 34 Male 90 96 86 181 69.0 879987
34 35 Female 83 90 81 143 66.5 834344
35 36 Female 133 129 128 153 66.5 948066
36 37 Male 140 150 124 144 70.5 949395
37 38 Female 88 86 94 139 64.5 893983
38 39 Male 81 90 74 148 74.0 930016
39 40 Male 89 91 89 179 75.5 935863

In [11]:
import numpy as np
t = np.linspace(-6, 6, 20)
sin_t = np.sin(t)
cos_t = np.cos(t)

In [12]:
pd.DataFrame({'t':t, 'sin':sin_t, 'cos':cos_t})


Out[12]:
cos sin t
0 0.960170 0.279415 -6.000000
1 0.609977 0.792419 -5.368421
2 0.024451 0.999701 -4.736842
3 -0.570509 0.821291 -4.105263
4 -0.945363 0.326021 -3.473684
5 -0.955488 -0.295030 -2.842105
6 -0.596979 -0.802257 -2.210526
7 -0.008151 -0.999967 -1.578947
8 0.583822 -0.811882 -0.947368
9 0.950551 -0.310567 -0.315789
10 0.950551 0.310567 0.315789
11 0.583822 0.811882 0.947368
12 -0.008151 0.999967 1.578947
13 -0.596979 0.802257 2.210526
14 -0.955488 0.295030 2.842105
15 -0.945363 -0.326021 3.473684
16 -0.570509 -0.821291 4.105263
17 0.024451 -0.999701 4.736842
18 0.609977 -0.792419 5.368421
19 0.960170 -0.279415 6.000000

In [13]:
data.shape


Out[13]:
(40, 8)

In [14]:
data.columns


Out[14]:
Index([u'Unnamed: 0', u'Gender', u'FSIQ', u'VIQ', u'PIQ', u'Weight', u'Height',
       u'MRI_Count'],
      dtype='object')

In [15]:
data['Gender']


Out[15]:
0     Female
1       Male
2       Male
3       Male
4     Female
5     Female
6     Female
7     Female
8       Male
9       Male
10    Female
11      Male
12      Male
13    Female
14    Female
15    Female
16    Female
17      Male
18    Female
19      Male
20      Male
21      Male
22    Female
23      Male
24    Female
25      Male
26    Female
27      Male
28    Female
29    Female
30    Female
31      Male
32      Male
33      Male
34    Female
35    Female
36      Male
37    Female
38      Male
39      Male
Name: Gender, dtype: object

In [16]:
# Simpler selector
data[data['Gender'] == 'Female']['VIQ'].mean()


Out[16]:
109.45

In [21]:
groupby_gender = data.groupby('Gender')
for gender, value in groupby_gender['VIQ']:
    print gender,value.mean()


Female 109.45
Male 115.25

In [22]:
groupby_gender.mean()


Out[22]:
Unnamed: 0 FSIQ VIQ PIQ Weight Height MRI_Count
Gender
Female 19.65 111.9 109.45 110.45 137.200000 65.765000 862654.6
Male 21.35 115.0 115.25 111.60 166.444444 71.431579 954855.4

In [25]:
from pandas.tools import plotting
plotting.scatter_matrix(data[['Weight','Height','MRI_Count']])
show()



In [28]:
plotting.scatter_matrix(data[['PIQ','VIQ','FSIQ']])
show()



In [32]:
from scipy import stats
stats.ttest_1samp(data['VIQ'], 0)


Out[32]:
(30.088099970849328, 1.3289196468728067e-28)

In [ ]: