In [162]:

    
import scipy
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from pandas.plotting import scatter_matrix
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from sklearn.utils import resample
%matplotlib inline



In [2]:

    
# Read and import data
raw_data = pd.read_csv('ESSdata_Thinkful.csv', encoding='latin-1')
raw_data.head()



In [3]:

    
#Identify all the columns in the file
raw_data.columns









    Out[3]:





Index(['cntry', 'idno', 'year', 'tvtot', 'ppltrst', 'pplfair', 'pplhlp',
       'happy', 'sclmeet', 'sclact', 'gndr', 'agea', 'partner'],
      dtype='object')



In [4]:

    
#Add a column that joins country and year
raw_data['cntry_year'] = raw_data.apply(lambda row: str(row.cntry) + '_' + str(row.year), axis=1)



In [5]:

    
#Analyse types of variables
raw_data.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 14 columns):
cntry         8594 non-null object
idno          8594 non-null float64
year          8594 non-null int64
tvtot         8586 non-null float64
ppltrst       8580 non-null float64
pplfair       8555 non-null float64
pplhlp        8569 non-null float64
happy         8563 non-null float64
sclmeet       8579 non-null float64
sclact        8500 non-null float64
gndr          8584 non-null float64
agea          8355 non-null float64
partner       8577 non-null float64
cntry_year    8594 non-null object
dtypes: float64(11), int64(1), object(2)
memory usage: 940.0+ KB



In [6]:

    
#Number of different countries in the dataset
raw_data.cntry.unique()









    Out[6]:





array(['CH', 'CZ', 'DE', 'ES', 'NO', 'SE'], dtype=object)

1. Did people become less trusting from 2012 to 2014? Compute results for each country in the sample.



In [8]:

    
#Get info of the dataset once reduced to the trust variable
#Slice the dataframe to the people trust variable
ppltrust = raw_data[['cntry','cntry_year','ppltrst']]

#Info
ppltrust.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 3 columns):
cntry         8594 non-null object
cntry_year    8594 non-null object
ppltrst       8580 non-null float64
dtypes: float64(1), object(2)
memory usage: 201.5+ KB



In [9]:

    
#Clean the values in the dataframe that are null
ppltrust_clean = ppltrust[ppltrust.ppltrst.notnull()]

#Check the remaining values
ppltrust_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 8580 entries, 0 to 8593
Data columns (total 3 columns):
cntry         8580 non-null object
cntry_year    8580 non-null object
ppltrst       8580 non-null float64
dtypes: float64(1), object(2)
memory usage: 268.1+ KB



In [10]:

    
#Check the sizes of data by country
ppltrust_clean.groupby('cntry_year').size()









    Out[10]:





cntry_year
CH_6     773
CH_7     773
CZ_6     651
CZ_7     657
DE_6      14
DE_7      14
ES_6    1213
ES_7    1211
NO_6     730
NO_7     730
SE_6     906
SE_7     908
dtype: int64



In [11]:

    
#Print the dataset for visual inspection
ppltrust_clean.groupby('cntry_year').hist(sharey=True,sharex=True)









    Out[11]:





cntry_year
CH_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CH_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CZ_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CZ_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
DE_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
DE_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
ES_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
ES_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
NO_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
NO_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
SE_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
SE_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
dtype: object

All countries except for Denmark seem to follow a normal distribution



In [58]:

    
#Build the lists for each country each year
T_CH_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'CH_6']['ppltrst'].tolist()
T_CH_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'CH_7']['ppltrst'].tolist()
T_CZ_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'CZ_6']['ppltrst'].tolist()
T_CZ_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'CZ_7']['ppltrst'].tolist()
T_DE_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'DE_6']['ppltrst'].tolist()
T_DE_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'DE_7']['ppltrst'].tolist()
T_ES_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'ES_6']['ppltrst'].tolist()
T_ES_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'ES_7']['ppltrst'].tolist()
T_NO_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'NO_6']['ppltrst'].tolist()
T_NO_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'NO_7']['ppltrst'].tolist()
T_SE_6 = ppltrust_clean[ppltrust_clean.cntry_year == 'SE_6']['ppltrst'].tolist()
T_SE_7 = ppltrust_clean[ppltrust_clean.cntry_year == 'SE_7']['ppltrst'].tolist()

#Build the arrays for comparison
trust = np.asarray(T_CH_6 + T_CH_7+
                   T_CZ_6 + T_CZ_7+
                   T_DE_6 + T_DE_7+
                   T_ES_6 + T_ES_7+
                   T_NO_6 + T_NO_7+
                   T_SE_6 + T_SE_7)

group = np.array(['T_CH_6','T_CH_7', 
                  'T_CZ_6','T_CZ_7',
                  'T_DE_6','T_DE_7',
                  'T_ES_6','T_ES_7',
                  'T_NO_6','T_NO_7',
                  'T_SE_6','T_SE_7'])

group = np.repeat(group, len(trust)/12)

#Run the pairwise t-test using tukey hsd
tukey = pairwise_tukeyhsd(endog=trust,    # Data
                          groups=group,   # Groups
                          alpha=0.05)     # Significance level

#Summarize the data
tukey.summary()









    Out[58]:





Multiple Comparison of Means - Tukey HSD,FWER=0.05

  group1  group2  meandiff   lower    upper   reject


  T_CH_6  T_CH_7   -0.0266  -0.3948  0.3417    False


  T_CH_6  T_CZ_6   -1.0713  -1.4396  -0.7031   True 


  T_CH_6  T_CZ_7   -1.1888  -1.557   -0.8206   True 


  T_CH_6  T_DE_6   -0.5958  -0.964   -0.2276   True 


  T_CH_6  T_DE_7   -0.5958  -0.964   -0.2276   True 


  T_CH_6  T_ES_6   -0.7804  -1.1487  -0.4122   True 


  T_CH_6  T_ES_7   0.2783   -0.0899  0.6466    False


  T_CH_6  T_NO_6   0.9538   0.5856   1.3221    True 


  T_CH_6  T_NO_7   0.5245   0.1562   0.8927    True 


  T_CH_6  T_SE_6   0.4308   0.0625    0.799    True 


  T_CH_6  T_SE_7     0.6    0.2318   0.9682    True 


  T_CH_7  T_CZ_6   -1.0448  -1.413   -0.6765   True 


  T_CH_7  T_CZ_7   -1.1622  -1.5305  -0.794    True 


  T_CH_7  T_DE_6   -0.5692  -0.9375  -0.201    True 


  T_CH_7  T_DE_7   -0.5692  -0.9375  -0.201    True 


  T_CH_7  T_ES_6   -0.7538  -1.1221  -0.3856   True 


  T_CH_7  T_ES_7   0.3049   -0.0633  0.6731    False


  T_CH_7  T_NO_6   0.9804   0.6122   1.3487    True 


  T_CH_7  T_NO_7    0.551   0.1828   0.9193    True 


  T_CH_7  T_SE_6   0.4573   0.0891   0.8256    True 


  T_CH_7  T_SE_7   0.6266   0.2583   0.9948    True 


  T_CZ_6  T_CZ_7   -0.1175  -0.4857  0.2507    False


  T_CZ_6  T_DE_6   0.4755   0.1073   0.8438    True 


  T_CZ_6  T_DE_7   0.4755   0.1073   0.8438    True 


  T_CZ_6  T_ES_6   0.2909   -0.0773  0.6591    False


  T_CZ_6  T_ES_7   1.3497   0.9814   1.7179    True 


  T_CZ_6  T_NO_6   2.0252   1.6569   2.3934    True 


  T_CZ_6  T_NO_7   1.5958   1.2276    1.964    True 


  T_CZ_6  T_SE_6   1.5021   1.1339   1.8703    True 


  T_CZ_6  T_SE_7   1.6713   1.3031   2.0396    True 


  T_CZ_7  T_DE_6    0.593   0.2248   0.9612    True 


  T_CZ_7  T_DE_7    0.593   0.2248   0.9612    True 


  T_CZ_7  T_ES_6   0.4084   0.0402   0.7766    True 


  T_CZ_7  T_ES_7   1.4671   1.0989   1.8354    True 


  T_CZ_7  T_NO_6   2.1427   1.7744   2.5109    True 


  T_CZ_7  T_NO_7   1.7133   1.3451   2.0815    True 


  T_CZ_7  T_SE_6   1.6196   1.2513   1.9878    True 


  T_CZ_7  T_SE_7   1.7888   1.4206    2.157    True 


  T_DE_6  T_DE_7     0.0    -0.3682  0.3682    False


  T_DE_6  T_ES_6   -0.1846  -0.5528  0.1836    False


  T_DE_6  T_ES_7   0.8741   0.5059   1.2424    True 


  T_DE_6  T_NO_6   1.5497   1.1814   1.9179    True 


  T_DE_6  T_NO_7   1.1203    0.752   1.4885    True 


  T_DE_6  T_SE_6   1.0266   0.6583   1.3948    True 


  T_DE_6  T_SE_7   1.1958   0.8276    1.564    True 


  T_DE_7  T_ES_6   -0.1846  -0.5528  0.1836    False


  T_DE_7  T_ES_7   0.8741   0.5059   1.2424    True 


  T_DE_7  T_NO_6   1.5497   1.1814   1.9179    True 


  T_DE_7  T_NO_7   1.1203    0.752   1.4885    True 


  T_DE_7  T_SE_6   1.0266   0.6583   1.3948    True 


  T_DE_7  T_SE_7   1.1958   0.8276    1.564    True 


  T_ES_6  T_ES_7   1.0587   0.6905    1.427    True 


  T_ES_6  T_NO_6   1.7343    1.366   2.1025    True 


  T_ES_6  T_NO_7   1.3049   0.9367   1.6731    True 


  T_ES_6  T_SE_6   1.2112    0.843   1.5794    True 


  T_ES_6  T_SE_7   1.3804   1.0122   1.7487    True 


  T_ES_7  T_NO_6   0.6755   0.3073   1.0438    True 


  T_ES_7  T_NO_7   0.2462   -0.1221  0.6144    False


  T_ES_7  T_SE_6   0.1524   -0.2158  0.5207    False


  T_ES_7  T_SE_7   0.3217   -0.0466  0.6899    False


  T_NO_6  T_NO_7   -0.4294  -0.7976  -0.0611   True 


  T_NO_6  T_SE_6   -0.5231  -0.8913  -0.1548   True 


  T_NO_6  T_SE_7   -0.3538  -0.7221  0.0144    False


  T_NO_7  T_SE_6   -0.0937  -0.4619  0.2745    False


  T_NO_7  T_SE_7   0.0755   -0.2927  0.4438    False


  T_SE_6  T_SE_7   0.1692   -0.199   0.5375    False

In this case we have run all the comparisons between countries. We are assuming that all of them follow a normal distribution.

From all the comparisons we are only focusing on those that are within countries between both years.

In the Tukey HSD test the null hypothesis is that all means being compared are from the same population.

Only in the cases of Spain and Norway the rejection of the null hypothesis is true being the means significantly different and therefore having having populations that are more trusting the case of Spain and less in the case of Norway. The rest of the countries under analysis remain the same.

2. Did people become happier from 2012 to 2014? Compute results for each country in the sample



In [212]:

    
#Get info of the dataset once reduced to the trust variable

#Slice the dataframe to the people happiness variable
pplhappy = raw_data[['cntry','cntry_year','happy']]

#Get info about the data
pplhappy.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 3 columns):
cntry         8594 non-null object
cntry_year    8594 non-null object
happy         8563 non-null float64
dtypes: float64(1), object(2)
memory usage: 201.5+ KB



In [92]:

    
#Clean the values in the dataframe that are null
pplhappy_clean = pplhappy[pplhappy.happy.notnull()]

remove_n = 7
drop_indices = np.random.choice(pplhappy_clean.index, remove_n, replace=False)
pplhappy_clean = pplhappy_clean.drop(drop_indices)
#Check the remaining values
pplhappy_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 8556 entries, 0 to 8593
Data columns (total 3 columns):
cntry         8556 non-null object
cntry_year    8556 non-null object
happy         8556 non-null float64
dtypes: float64(1), object(2)
memory usage: 267.4+ KB



In [93]:

    
#Check the sizes of data by country
pplhappy_clean.groupby('cntry_year').size()









    Out[93]:





cntry_year
CH_6     769
CH_7     772
CZ_6     646
CZ_7     652
DE_6      14
DE_7      14
ES_6    1209
ES_7    1212
NO_6     727
NO_7     730
SE_6     907
SE_7     904
dtype: int64



In [94]:

    
#Plot happy variables for visual inspection per country and year
pplhappy_clean.groupby('cntry_year').hist(sharey=True,sharex=True)









    Out[94]:





cntry_year
CH_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CH_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CZ_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
CZ_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
DE_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
DE_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
ES_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
ES_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
NO_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
NO_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
SE_6    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
SE_7    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
dtype: object



In [97]:

    
#Build the lists for each country each year
H_CH_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'CH_6']['happy'].tolist()
H_CH_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'CH_7']['happy'].tolist()
H_CZ_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'CZ_6']['happy'].tolist()
H_CZ_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'CZ_7']['happy'].tolist()
H_DE_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'DE_6']['happy'].tolist()
H_DE_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'DE_7']['happy'].tolist()
H_ES_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'ES_6']['happy'].tolist()
H_ES_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'ES_7']['happy'].tolist()
H_NO_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'NO_6']['happy'].tolist()
H_NO_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'NO_7']['happy'].tolist()
H_SE_6 = pplhappy_clean[pplhappy_clean.cntry_year == 'SE_6']['happy'].tolist()
H_SE_7 = pplhappy_clean[pplhappy_clean.cntry_year == 'SE_7']['happy'].tolist()

#Build the arrays for comparison
happy = np.asarray(H_CH_6 + H_CH_7 +
                   H_CZ_6 + H_CZ_7 + 
                   H_DE_6 + H_DE_7 + 
                   H_ES_6 + H_ES_7 + 
                   H_NO_6 + H_NO_7 + 
                   H_SE_6 + H_SE_7)

group_happy = np.array(['H_CH_6', 'H_CH_7',
                     'H_CZ_6', 'H_CZ_7',
                     'H_DE_6', 'H_DE_7',
                     'H_ES_6', 'H_ES_7',
                     'H_NO_6', 'H_NO_7',
                     'H_SE_6', 'H_SE_7'])

group_happy = np.repeat(group_happy, len(happy)/12)

#Run the pairwise t-test using tukey hsd
tukey = pairwise_tukeyhsd(endog=happy,    # Data
                          groups=group_happy,   # Groups
                          alpha=0.05)     # Significance level

#Summarize the data
tukey.summary()









    Out[97]:





Multiple Comparison of Means - Tukey HSD,FWER=0.05

  group1  group2  meandiff   lower    upper   reject


  H_CH_6  H_CH_7   0.0042   -0.288   0.2964    False


  H_CH_6  H_CZ_6   -1.1262  -1.4184  -0.8341   True 


  H_CH_6  H_CZ_7   -1.1992  -1.4913  -0.907    True 


  H_CH_6  H_DE_6   -0.5947  -0.8868  -0.3025   True 


  H_CH_6  H_DE_7   -0.554   -0.8462  -0.2618   True 


  H_CH_6  H_ES_6   -0.7349  -1.0271  -0.4428   True 


  H_CH_6  H_ES_7   -0.1893  -0.4815  0.1028    False


  H_CH_6  H_NO_6   0.0168   -0.2753   0.309    False


  H_CH_6  H_NO_7   -0.1318  -0.424   0.1603    False


  H_CH_6  H_SE_6   -0.2581  -0.5502  0.0341    False


  H_CH_6  H_SE_7   -0.1571  -0.4492  0.1351    False


  H_CH_7  H_CZ_6   -1.1304  -1.4226  -0.8383   True 


  H_CH_7  H_CZ_7   -1.2034  -1.4955  -0.9112   True 


  H_CH_7  H_DE_6   -0.5989  -0.891   -0.3067   True 


  H_CH_7  H_DE_7   -0.5582  -0.8504  -0.266    True 


  H_CH_7  H_ES_6   -0.7391  -1.0313  -0.447    True 


  H_CH_7  H_ES_7   -0.1935  -0.4857  0.0986    False


  H_CH_7  H_NO_6   0.0126   -0.2795  0.3048    False


  H_CH_7  H_NO_7   -0.136   -0.4282  0.1561    False


  H_CH_7  H_SE_6   -0.2623  -0.5544  0.0299    False


  H_CH_7  H_SE_7   -0.1613  -0.4535  0.1309    False


  H_CZ_6  H_CZ_7   -0.0729  -0.3651  0.2192    False


  H_CZ_6  H_DE_6   0.5316   0.2394   0.8237    True 


  H_CZ_6  H_DE_7   0.5722   0.2801   0.8644    True 


  H_CZ_6  H_ES_6   0.3913   0.0991   0.6835    True 


  H_CZ_6  H_ES_7   0.9369   0.6447   1.2291    True 


  H_CZ_6  H_NO_6   1.1431   0.8509   1.4352    True 


  H_CZ_6  H_NO_7   0.9944   0.7022   1.2866    True 


  H_CZ_6  H_SE_6   0.8682    0.576   1.1603    True 


  H_CZ_6  H_SE_7   0.9691    0.677   1.2613    True 


  H_CZ_7  H_DE_6   0.6045   0.3123   0.8967    True 


  H_CZ_7  H_DE_7   0.6452    0.353   0.9373    True 


  H_CZ_7  H_ES_6   0.4642   0.1721   0.7564    True 


  H_CZ_7  H_ES_7   1.0098   0.7177    1.302    True 


  H_CZ_7  H_NO_6    1.216   0.9238   1.5082    True 


  H_CZ_7  H_NO_7   1.0673   0.7752   1.3595    True 


  H_CZ_7  H_SE_6   0.9411   0.6489   1.2333    True 


  H_CZ_7  H_SE_7   1.0421   0.7499   1.3342    True 


  H_DE_6  H_DE_7   0.0407   -0.2515  0.3328    False


  H_DE_6  H_ES_6   -0.1403  -0.4324  0.1519    False


  H_DE_6  H_ES_7   0.4053   0.1132   0.6975    True 


  H_DE_6  H_NO_6   0.6115   0.3193   0.9037    True 


  H_DE_6  H_NO_7   0.4628   0.1707    0.755    True 


  H_DE_6  H_SE_6   0.3366   0.0444   0.6288    True 


  H_DE_6  H_SE_7   0.4376   0.1454   0.7298    True 


  H_DE_7  H_ES_6   -0.1809  -0.4731  0.1112    False


  H_DE_7  H_ES_7   0.3647   0.0725   0.6568    True 


  H_DE_7  H_NO_6   0.5708   0.2787    0.863    True 


  H_DE_7  H_NO_7   0.4222    0.13    0.7143    True 


  H_DE_7  H_SE_6   0.2959   0.0038   0.5881    True 


  H_DE_7  H_SE_7   0.3969   0.1048   0.6891    True 


  H_ES_6  H_ES_7   0.5456   0.2534   0.8377    True 


  H_ES_6  H_NO_6   0.7518   0.4596   1.0439    True 


  H_ES_6  H_NO_7   0.6031   0.3109   0.8952    True 


  H_ES_6  H_SE_6   0.4769   0.1847    0.769    True 


  H_ES_6  H_SE_7   0.5778   0.2857    0.87     True 


  H_ES_7  H_NO_6   0.2062   -0.086   0.4983    False


  H_ES_7  H_NO_7   0.0575   -0.2347  0.3497    False


  H_ES_7  H_SE_6   -0.0687  -0.3609  0.2234    False


  H_ES_7  H_SE_7   0.0323   -0.2599  0.3244    False


  H_NO_6  H_NO_7   -0.1487  -0.4408  0.1435    False


  H_NO_6  H_SE_6   -0.2749  -0.5671  0.0173    False


  H_NO_6  H_SE_7   -0.1739  -0.4661  0.1183    False


  H_NO_7  H_SE_6   -0.1262  -0.4184  0.1659    False


  H_NO_7  H_SE_7   -0.0252  -0.3174  0.2669    False


  H_SE_6  H_SE_7    0.101   -0.1912  0.3931    False

In this case we have run all the comparisons between countries. We are assuming that all of them follow a normal distribution.

From all the comparisons we are only focusing on those that are within countries between both years.

In the Tukey HSD test the null hypothesis is that all means being compared are from the same population.

Only in the cases of Spain the rejection of the null hypothesis is true being the means significantly different and therefore having populations that are happier in recent years. The rest of the countries under analysis remain the same.

3. Who reported watching more TV in 2012, men or women?



In [177]:

    
#Get info of the dataset once reduced to the trust variable
#Slice the dataframe to the people trust variable
tv_total = raw_data[['gndr','year','tvtot']]

#Info
tv_total.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 3 columns):
gndr     8584 non-null float64
year     8594 non-null int64
tvtot    8586 non-null float64
dtypes: float64(2), int64(1)
memory usage: 201.5 KB



In [184]:

    
#Slice by the year 2012
tv_total_2012 = tv_total[tv_total.year == 6]

#Drop year
tv_total_2012bis = tv_total_2012.drop('year',axis = 1)

#Get information of the new dataframe
tv_total_2012bis.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 4297 entries, 0 to 7685
Data columns (total 2 columns):
gndr     4297 non-null float64
tvtot    4291 non-null float64
dtypes: float64(2)
memory usage: 100.7 KB



In [186]:

    
#Clean the values in the dataframe that are null (gender)
tv_total_gndr_cl = tv_total_2012bis[tv_total_2012bis.gndr.notnull()]

#Clean the values in the dataframe that are null (total hours)
tv_total_cle = tv_total_gndr_cl[tv_total_gndr_cl.tvtot.notnull()]

#Reset index
tv_total_clea = tv_total_cle.reset_index()

#Drop old index
tv_total_clean = tv_total_clea.drop('index',axis = 1)

#Check Values
tv_total_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4291 entries, 0 to 4290
Data columns (total 2 columns):
gndr     4291 non-null float64
tvtot    4291 non-null float64
dtypes: float64(2)
memory usage: 67.1 KB



In [195]:

    
#Downsample majority class (due to computational restrictions we downsample the majority instead of upsampling the minority)

# Separate majority and minority classes
tv_total_clean_majority = tv_total_clean[tv_total_clean.gndr == 1.0]
tv_total_clean_minority = tv_total_clean[tv_total_clean.gndr == 2.0]
 
# Downsample the majority
tv_total_clean_majority_downsampled = resample(tv_total_clean_majority, replace=False, n_samples=2140, random_state=123) 
 
# Combine minority class with downsampled majority class
tv_total_complete = pd.concat([tv_total_clean_majority_downsampled, tv_total_clean_minority])

# Display new class counts
tv_total_complete.gndr.value_counts()









    Out[195]:





2.0    2140
1.0    2140
Name: gndr, dtype: int64



In [196]:

    
#Plot happy variables for visual inspection per country and year
tv_total_complete.groupby('gndr').hist(sharey=True,sharex=True)









    Out[196]:





gndr
1.0    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
2.0    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
dtype: object

The distribution of total tv watching hours is non parametric as it can be appreciated when plotting the hours by gender for 2012.



In [197]:

    
#Build a new dataframe with the total tv hours per gender
tv_total_reordered = pd.DataFrame()

#Create lists splitting between men and women the total tv watching hours
men = list(tv_total_complete[tv_total_complete.gndr == 1.0]['tvtot'])
women = list(tv_total_complete[tv_total_complete.gndr == 2.0]['tvtot'])

#Build columns and add to the dataframe
tv_total_reordered['men'] = men
tv_total_reordered['women'] = women



In [198]:

    
# Kruskal-Wallace Test

# Join all ratings together into a list, then ranking them.
ranks = stats.rankdata(
    tv_total_reordered['men'].tolist() +
    tv_total_reordered['women'].tolist())

# Add the new ranked variables to the data frame.
tv_total_reordered['Ranks_men'] = ranks[0:2140]
tv_total_reordered['Ranks_women'] = ranks[2140:4280]


# Average rank per group.
groups = ['Ranks_men', 'Ranks_women']
print(tv_total_reordered[groups].apply(np.mean))

# Overall average rank.
print(np.mean(ranks))

# Print the test statistic followed by the probability of getting this result
# if the groups were not different.
stats.kruskal(
    tv_total_reordered['men'],
    tv_total_reordered['women'])









    



Ranks_men      2126.00514
Ranks_women    2154.99486
dtype: float64
2140.5






    Out[198]:





KruskalResult(statistic=0.6014571876999385, pvalue=0.43802258247065096)

It appears that the groups do not differ significantly. The test says there is a 43.8% chance of getting our data if there were no systematic difference between in the total tv watching hours per gender in the population. Based on our data, tv watching hours doesn't seem to differ systematically across gender.

4. Who was more likely to believe people were fair in 2012, people living with a partner or people living alone?



In [199]:

    
#Get info of the dataset once reduced to the trust variable
#Slice the dataframe to the people trust variable
pplfairness = raw_data[['partner','year','pplfair']]

#Info
pplfairness.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 3 columns):
partner    8577 non-null float64
year       8594 non-null int64
pplfair    8555 non-null float64
dtypes: float64(2), int64(1)
memory usage: 201.5 KB



In [201]:

    
#Slice by the year 2012
pplfairness_2012 = pplfairness[pplfairness.year == 6]

#Drop year
pplfairness_2012bis = pplfairness_2012.drop('year',axis = 1)

#Get information of the new dataframe
pplfairness_2012bis.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 4297 entries, 0 to 7685
Data columns (total 2 columns):
partner    4290 non-null float64
pplfair    4277 non-null float64
dtypes: float64(2)
memory usage: 100.7 KB



In [202]:

    
#Clean the values in the dataframe that are null (partner)
pplfairness_2012bis_partner_cl = pplfairness_2012bis[pplfairness_2012bis.partner.notnull()]

#Clean the values in the dataframe that are null (total hours)
pplfairness_cle = pplfairness_2012bis_partner_cl[pplfairness_2012bis_partner_cl.pplfair.notnull()]

#Reset index
pplfairness_clea = pplfairness_cle.reset_index()

#Drop old index
pplfairness_clean = pplfairness_clea.drop('index',axis = 1)

#Check Values
pplfairness_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4271 entries, 0 to 4270
Data columns (total 2 columns):
partner    4271 non-null float64
pplfair    4271 non-null float64
dtypes: float64(2)
memory usage: 66.8 KB



In [204]:

    
#Downsample majority class (due to computational restrictions we downsample the majority instead of upsampling the minority)

# Separate majority and minority classes
pplfairness_clean_majority = pplfairness_clean[pplfairness_clean.partner == 1.0]
pplfairness_clean_minority = pplfairness_clean[pplfairness_clean.partner == 2.0]
 
# Downsample the majority
pplfairness_clean_majority_downsampled = resample(pplfairness_clean_majority, replace=False, n_samples=1608, random_state=123) 
 
# Combine minority class with downsampled majority class
pplfairness_complete = pd.concat([pplfairness_clean_majority_downsampled, pplfairness_clean_minority])

# Display new class counts
pplfairness_complete.partner.value_counts()









    Out[204]:





2.0    1608
1.0    1608
Name: partner, dtype: int64



In [205]:

    
#Plot happy variables for visual inspection per country and year
pplfairness_complete.groupby('partner').hist(sharey=True,sharex=True)









    Out[205]:





partner
1.0    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
2.0    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
dtype: object

Distributions are considered to be normal although skewed to the left



In [207]:

    
#Build a new dataframe with the total tv hours per gender
pplfairness_reordered = pd.DataFrame()

#Create lists splitting between men and women the total tv watching hours
wpartner = list(pplfairness_complete[pplfairness_complete.partner == 1.0]['pplfair'])
alone = list(pplfairness_complete[pplfairness_complete.partner == 2.0]['pplfair'])

#Build columns and add to the dataframe
pplfairness_reordered['wpartner'] = wpartner
pplfairness_reordered['alone'] = alone



In [208]:

    
# Printing the means for each group.
print(pplfairness_reordered.mean())

pplfairness_reordered.boxplot()
plt.show()


F, p = stats.f_oneway(
    pplfairness_reordered['wpartner'],
    pplfairness_reordered['alone'])

# The F statistic.
print(F)

# The probability. A p < .05 would lead us to believe the group means were
# not all similar in the population.
print(p)









    



wpartner    6.085821
alone       5.856965
dtype: float64






    












    



9.126537190662138
0.002539122042052086



In [210]:

    
#Run a one way ANOVA test on the groups
print(stats.ttest_ind(pplfairness_reordered['wpartner'], pplfairness_reordered['alone']))









    



Ttest_indResult(statistic=3.021015920292726, pvalue=0.0025391220420555288)

It appears that the two groups of people (in 2012) living with a partner or alone are likely to differ in the population (p < .005). Visual inspection suggests that the people living alone believe that people are not fair compared to the ones living with their partner. To test whether the people living alone is the odd one out, we compare each group to each other.

The results of this test are interesting! Contrary to what we might have expected from a visual inspection of the histograms, the means of all three groups seem to be significantly different from one another. Wood is highest, then Steel, then Plastic.



In [211]:

    
#Tukey HSD test

#Build the arrays for comparison
fair = np.asarray(wpartner + alone)

group_fair = np.array(['wpartner', 'alone'])

group_fair = np.repeat(group_fair, len(fair)/2)

#Run the pairwise t-test using tukey hsd
tukey = pairwise_tukeyhsd(endog=fair,    # Data
                          groups=group_fair,   # Groups
                          alpha=0.05)     # Significance level

#Summarize the data
tukey.summary()









    Out[211]:





Multiple Comparison of Means - Tukey HSD,FWER=0.05

  group1   group2   meandiff   lower   upper  reject


   alone  wpartner   0.2289   0.0803  0.3774   True

In the Tukey HSD test the null hypothesis is that all means being compared are from the same population.

This test confirms the result obtained by the one way ANOVA test. In this case the rejection of the null hypothesis is true being the means significantly different and therefore having populations that are belive that others are more unfair if they live alone than when they live with their partners.

5. Pick three or four of the countries in the sample and compare how often people met socially in 2014. Are there differences, and if so, which countries stand out?



In [213]:

    
#Get info of the dataset once reduced to the trust variable
#Slice the dataframe to the people trust variable
meet = raw_data[['cntry','year','sclmeet']]

#Info
meet.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 3 columns):
cntry      8594 non-null object
year       8594 non-null int64
sclmeet    8579 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 201.5+ KB



In [214]:

    
#Slice by the year 2014
meet_2014 = meet[meet.year == 7]

#Drop year
meet_2014bis = meet_2014.drop('year',axis = 1)

#Get information of the new dataframe
meet_2014bis.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 4297 entries, 773 to 8593
Data columns (total 2 columns):
cntry      4297 non-null object
sclmeet    4291 non-null float64
dtypes: float64(1), object(1)
memory usage: 100.7+ KB



In [215]:

    
#Clean the values in the dataframe that are null (country)
meet_2014bis_cntr_cl = meet_2014bis[meet_2014bis.cntry.notnull()]

#Clean the values in the dataframe that are null (social meet)
meet_cle = meet_2014bis_cntr_cl[meet_2014bis_cntr_cl.sclmeet.notnull()]

#Reset index
meet_clea = meet_cle.reset_index()

#Drop old index
meet_clean = meet_clea.drop('index',axis = 1)

#Check Values
meet_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4291 entries, 0 to 4290
Data columns (total 2 columns):
cntry      4291 non-null object
sclmeet    4291 non-null float64
dtypes: float64(1), object(1)
memory usage: 67.1+ KB



In [222]:

    
#SLice the dataframe for three countries
meet_final = meet_clean.loc[meet_clean['cntry'].isin(['ES','SE','CH'])]









    Out[222]:





ES    1211
SE     908
CH     772
Name: cntry, dtype: int64



In [223]:

    
#Plot happy variables for visual inspection per country and year
meet_final.groupby('cntry').hist(sharey=True,sharex=True)









    Out[223]:





cntry
CH    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
ES    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
SE    [[AxesSubplot(0.125,0.125;0.775x0.755)]]
dtype: object



In [247]:

    
#Downsample majority class (due to computational restrictions we downsample the majority instead of upsampling the minority)

# Separate majority and minority classes
meet_final_majority_1 = meet_final[meet_final.cntry == 'ES']
meet_final_majority_2 = meet_final[meet_final.cntry == 'SE']
meet_final_minority = meet_final[meet_final.cntry == 'CH']
 
# Downsample the majority
meet_final_majority_1_downsampled = resample(meet_final_majority_1, replace=False, n_samples=772, random_state=123) 
meet_final_majority_2_downsampled = resample(meet_final_majority_2, replace=False, n_samples=772, random_state=123)  

# Combine minority class with downsampled majority class
meet_complete = pd.concat([meet_final_majority_1_downsampled, meet_final_majority_2_downsampled, meet_final_minority])

# Display new class counts
meet_complete.cntry.value_counts()









    Out[247]:





SE    772
ES    772
CH    772
Name: cntry, dtype: int64



In [248]:

    
#Build a new dataframe with the total tv hours per gender
meet_reordered = pd.DataFrame()

#Create lists splitting between countries
ES = list(meet_complete[meet_complete.cntry == 'ES']['sclmeet'])
SE = list(meet_complete[meet_complete.cntry == 'SE']['sclmeet'])
CH = list(meet_complete[meet_complete.cntry == 'CH']['sclmeet'])

#Build columns and add to the dataframe
meet_reordered['SE'] = SE
meet_reordered['ES'] = ES
meet_reordered['CH'] = CH



In [249]:

    
# Kruskal-Wallace Test

# Join all ratings together into a list, then ranking them.
ranks = stats.rankdata(
    meet_reordered['ES'].tolist() +
    meet_reordered['SE'].tolist() + 
    meet_reordered['CH'].tolist())

# Add the new ranked variables to the data frame.
meet_reordered['Ranks_ES'] = ranks[0:772]
meet_reordered['Ranks_SE'] = ranks[772:1544]
meet_reordered['Ranks_CH'] = ranks[1544:2316]


# Average rank per group.
groups = ['Ranks_ES', 'Ranks_SE', 'Ranks_CH']
print(meet_reordered[groups].apply(np.mean))

# Overall average rank.
print(np.mean(ranks))

# Print the test statistic followed by the probability of getting this result
# if the groups were not different.
stats.kruskal(
    meet_reordered['ES'],
    meet_reordered['SE'],
    meet_reordered['CH'])









    



Ranks_ES    1160.752591
Ranks_SE    1234.441710
Ranks_CH    1080.305699
dtype: float64
1158.5






    Out[249]:





KruskalResult(statistic=21.838360207302635, pvalue=1.8107576725725307e-05)

It appears that the groups do differ significantly. The test says there is a less than 0.05% chance of getting our data if there were no systematic difference between in the social meetings between countries. Based on our data, social meeting seem to differ systematically across countries.

Based on the differences it seems that Sweden stands out having more socal hours than the rest, followed by Spain and Switzerland in the third place



In [231]:

    
#Get info of the dataset once reduced to the trust variable
#Slice the dataframe to the people trust variable
social = raw_data[['cntry','year', 'agea', 'sclact']]

#Info
social.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8594 entries, 0 to 8593
Data columns (total 4 columns):
cntry     8594 non-null object
year      8594 non-null int64
agea      8355 non-null float64
sclact    8500 non-null float64
dtypes: float64(2), int64(1), object(1)
memory usage: 268.6+ KB



In [232]:

    
#Slice by the year 2014
social_2014 = social[social.year == 7]

#Drop year
social_2014bis = social_2014.drop('year',axis = 1)

#Get information of the new dataframe
social_2014bis.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 4297 entries, 773 to 8593
Data columns (total 3 columns):
cntry     4297 non-null object
agea      4184 non-null float64
sclact    4249 non-null float64
dtypes: float64(2), object(1)
memory usage: 134.3+ KB



In [237]:

    
#Clean the values in the dataframe that are null (country)
social_2014bis_cntr_cl = social_2014bis[meet_2014bis.cntry.notnull()]

#Clean the values in the dataframe that are null (social activities)
social_cle = social_2014bis_cntr_cl[social_2014bis_cntr_cl.sclact.notnull()]

#Reset index
social_clea = social_cle.reset_index()

#Drop old index
social_clean = social_clea.drop('index',axis = 1)

#Check Values
social_clean.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4249 entries, 0 to 4248
Data columns (total 3 columns):
cntry     4249 non-null object
agea      4137 non-null float64
sclact    4249 non-null float64
dtypes: float64(2), object(1)
memory usage: 99.7+ KB



In [264]:

    
#SLice the dataframe for three countries
social_complete = social_clean.loc[social_clean['cntry'].isin(['ES','SE','CH'])]

#Count values per country
social_complete.cntry.value_counts()









    Out[264]:





ES    1185
SE     904
CH     765
Name: cntry, dtype: int64



In [265]:

    
#Plot happy variables for visual inspection per country and year
social_complete.groupby('cntry').hist(sharey=False,sharex=False)









    Out[265]:





cntry
CH    [[AxesSubplot(0.125,0.125;0.336957x0.755), Axe...
ES    [[AxesSubplot(0.125,0.125;0.336957x0.755), Axe...
SE    [[AxesSubplot(0.125,0.125;0.336957x0.755), Axe...
dtype: object



In [266]:

    
#Create lists gourping by age interval and country Spain: <20, between 20 and 40, between 40 and 60, more than 60
ES_1 = list(social_complete[(social_complete.cntry == 'ES') & (social_complete.agea <= 20)]['sclact'])
ES_2 = list(social_complete[(social_complete.cntry == 'ES') & (social_complete.agea > 20)& (social_complete.agea <= 40)]['sclact'])
ES_3 = list(social_complete[(social_complete.cntry == 'ES') & (social_complete.agea > 40)& (social_complete.agea <= 60)]['sclact'])
ES_4 = list(social_complete[(social_complete.cntry == 'ES') & (social_complete.agea > 60) ]['sclact'])

#Create lists gourping by age interval and country Sweden: <20, between 20 and 40, between 40 and 60, more than 60
SE_1 = list(social_complete[(social_complete.cntry == 'SE') & (social_complete.agea <= 20)]['sclact'])
SE_2 = list(social_complete[(social_complete.cntry == 'SE') & (social_complete.agea > 20)& (social_complete.agea <= 40)]['sclact'])
SE_3 = list(social_complete[(social_complete.cntry == 'SE') & (social_complete.agea > 40)& (social_complete.agea <= 60)]['sclact'])
SE_4 = list(social_complete[(social_complete.cntry == 'SE') & (social_complete.agea > 60) ]['sclact'])

#Create lists gourping by age interval and country Switzerland: <20, between 20 and 40, between 40 and 60, more than 60
CH_1 = list(social_complete[(social_complete.cntry == 'CH') & (social_complete.agea <= 20)]['sclact'])
CH_2 = list(social_complete[(social_complete.cntry == 'CH') & (social_complete.agea > 20)& (social_complete.agea <= 40)]['sclact'])
CH_3 = list(social_complete[(social_complete.cntry == 'CH') & (social_complete.agea > 40)& (social_complete.agea <= 60)]['sclact'])
CH_4 = list(social_complete[(social_complete.cntry == 'CH') & (social_complete.agea > 60) ]['sclact'])



In [267]:

    
#Tukey hsd
#Build the arrays for comparison
social= np.asarray(ES_1 + ES_2 + ES_3 + ES_4 +
                   SE_1 + SE_2 + SE_3 + SE_4 +
                   CH_1 + CH_2 + CH_3 + CH_4)

group_social = np.array(['ES_1', 'ES_2', 'ES_3', 'ES_4',
                   'SE_1', 'SE_2', 'SE_3', 'SE_4',
                   'CH_1', 'CH_2', 'CH_3', 'CH_4'])

group_social = np.repeat(group_social, len(social)/12)

#Run the pairwise t-test using tukey hsd
tukey = pairwise_tukeyhsd(endog=social,    # Data
                          groups=group_social,   # Groups
                          alpha=0.05)     # Significance level

#Summarize the data
tukey.summary()









    Out[267]:





Multiple Comparison of Means - Tukey HSD,FWER=0.05

  group1  group2  meandiff   lower    upper   reject


   CH_1    CH_2    -0.1472  -0.4237  0.1294    False


   CH_1    CH_3    -0.1385  -0.4151   0.138    False


   CH_1    CH_4    -0.1732  -0.4497  0.1034    False


   CH_1    ES_1    -0.2554  -0.532   0.0211    False


   CH_1    ES_2    -0.2511  -0.5276  0.0255    False


   CH_1    ES_3    -0.2771  -0.5536  -0.0005   True 


   CH_1    ES_4    -0.2078  -0.4844  0.0688    False


   CH_1    SE_1    -0.5844  -0.861   -0.3079   True 


   CH_1    SE_2    -0.0043  -0.2809  0.2722    False


   CH_1    SE_3    -0.1558  -0.4324  0.1207    False


   CH_1    SE_4    -0.0866  -0.3631   0.19     False


   CH_2    CH_3    0.0087   -0.2679  0.2852    False


   CH_2    CH_4    -0.026   -0.3025  0.2506    False


   CH_2    ES_1    -0.1082  -0.3848  0.1683    False


   CH_2    ES_2    -0.1039  -0.3805  0.1727    False


   CH_2    ES_3    -0.1299  -0.4064  0.1467    False


   CH_2    ES_4    -0.0606  -0.3372   0.216    False


   CH_2    SE_1    -0.4372  -0.7138  -0.1607   True 


   CH_2    SE_2    0.1429   -0.1337  0.4194    False


   CH_2    SE_3    -0.0087  -0.2852  0.2679    False


   CH_2    SE_4    0.0606   -0.216   0.3372    False


   CH_3    CH_4    -0.0346  -0.3112  0.2419    False


   CH_3    ES_1    -0.1169  -0.3934  0.1597    False


   CH_3    ES_2    -0.1126  -0.3891   0.164    False


   CH_3    ES_3    -0.1385  -0.4151   0.138    False


   CH_3    ES_4    -0.0693  -0.3458  0.2073    False


   CH_3    SE_1    -0.4459  -0.7224  -0.1693   True 


   CH_3    SE_2    0.1342   -0.1424  0.4108    False


   CH_3    SE_3    -0.0173  -0.2939  0.2592    False


   CH_3    SE_4    0.0519   -0.2246  0.3285    False


   CH_4    ES_1    -0.0823  -0.3588  0.1943    False


   CH_4    ES_2    -0.0779  -0.3545  0.1986    False


   CH_4    ES_3    -0.1039  -0.3805  0.1727    False


   CH_4    ES_4    -0.0346  -0.3112  0.2419    False


   CH_4    SE_1    -0.4113  -0.6878  -0.1347   True 


   CH_4    SE_2    0.1688   -0.1077  0.4454    False


   CH_4    SE_3    0.0173   -0.2592  0.2939    False


   CH_4    SE_4    0.0866    -0.19   0.3631    False


   ES_1    ES_2    0.0043   -0.2722  0.2809    False


   ES_1    ES_3    -0.0216  -0.2982  0.2549    False


   ES_1    ES_4    0.0476   -0.2289  0.3242    False


   ES_1    SE_1    -0.329   -0.6056  -0.0524   True 


   ES_1    SE_2    0.2511   -0.0255  0.5276    False


   ES_1    SE_3    0.0996   -0.177   0.3761    False


   ES_1    SE_4    0.1688   -0.1077  0.4454    False


   ES_2    ES_3    -0.026   -0.3025  0.2506    False


   ES_2    ES_4    0.0433   -0.2333  0.3199    False


   ES_2    SE_1    -0.3333  -0.6099  -0.0568   True 


   ES_2    SE_2    0.2468   -0.0298  0.5233    False


   ES_2    SE_3    0.0952   -0.1813  0.3718    False


   ES_2    SE_4    0.1645   -0.1121  0.4411    False


   ES_3    ES_4    0.0693   -0.2073  0.3458    False


   ES_3    SE_1    -0.3074  -0.5839  -0.0308   True 


   ES_3    SE_2    0.2727   -0.0038  0.5493    False


   ES_3    SE_3    0.1212   -0.1553  0.3978    False


   ES_3    SE_4    0.1905   -0.0861   0.467    False


   ES_4    SE_1    -0.3766  -0.6532  -0.1001   True 


   ES_4    SE_2    0.2035   -0.0731   0.48     False


   ES_4    SE_3    0.0519   -0.2246  0.3285    False


   ES_4    SE_4    0.1212   -0.1553  0.3978    False


   SE_1    SE_2    0.5801   0.3035   0.8566    True 


   SE_1    SE_3    0.4286    0.152   0.7051    True 


   SE_1    SE_4    0.4978   0.2213   0.7744    True 


   SE_2    SE_3    -0.1515  -0.4281   0.125    False


   SE_2    SE_4    -0.0823  -0.3588  0.1943    False


   SE_3    SE_4    0.0693   -0.2073  0.3458    False

The population has been grouped in four age intervals:

lower than 20,
Between 20 and 40,
Between 40 and 60,
More than 60

In this case we have run all the comparisons between age intervales and countries. In the Tukey HSD test the null hypothesis is that all means being compared are from the same population.

Only in the cases of Sweden the rejection of the null hypothesis is true being the means between different age intervals significantly different and therefore having populations that have different participation in social activities in 2014. In this case, the participation between SE_1 and SE_2, SE_3 and SE_4 are different. In all cases is SE_1 (population under 20) the one that has a higher participation in social activities.

The rest of the differences appear between countries and different ages (as the case of Sweden and Spain) or between population in the same interval as in the case of Switzerland and Sweden (population under 20) being the participation of CH_1 lower.

	cntry	idno	year	tvtot	ppltrst	pplfair	pplhlp	happy	sclmeet	sclact	gndr	agea	partner
0	CH	5.0	6	3.0	3.0	10.0	5.0	8.0	5.0	4.0	2.0	60.0	1.0
1	CH	25.0	6	6.0	5.0	7.0	5.0	9.0	3.0	2.0	2.0	59.0	1.0
2	CH	26.0	6	1.0	8.0	8.0	8.0	7.0	6.0	3.0	1.0	24.0	2.0
3	CH	28.0	6	4.0	6.0	6.0	7.0	10.0	6.0	2.0	2.0	64.0	1.0
4	CH	29.0	6	5.0	6.0	7.0	5.0	8.0	7.0	2.0	2.0	55.0	1.0

group1	group2	meandiff	lower	upper	reject
T_CH_6	T_CH_7	-0.0266	-0.3948	0.3417	False
T_CH_6	T_CZ_6	-1.0713	-1.4396	-0.7031	True
T_CH_6	T_CZ_7	-1.1888	-1.557	-0.8206	True
T_CH_6	T_DE_6	-0.5958	-0.964	-0.2276	True
T_CH_6	T_DE_7	-0.5958	-0.964	-0.2276	True
T_CH_6	T_ES_6	-0.7804	-1.1487	-0.4122	True
T_CH_6	T_ES_7	0.2783	-0.0899	0.6466	False
T_CH_6	T_NO_6	0.9538	0.5856	1.3221	True
T_CH_6	T_NO_7	0.5245	0.1562	0.8927	True
T_CH_6	T_SE_6	0.4308	0.0625	0.799	True
T_CH_6	T_SE_7	0.6	0.2318	0.9682	True
T_CH_7	T_CZ_6	-1.0448	-1.413	-0.6765	True
T_CH_7	T_CZ_7	-1.1622	-1.5305	-0.794	True
T_CH_7	T_DE_6	-0.5692	-0.9375	-0.201	True
T_CH_7	T_DE_7	-0.5692	-0.9375	-0.201	True
T_CH_7	T_ES_6	-0.7538	-1.1221	-0.3856	True
T_CH_7	T_ES_7	0.3049	-0.0633	0.6731	False
T_CH_7	T_NO_6	0.9804	0.6122	1.3487	True
T_CH_7	T_NO_7	0.551	0.1828	0.9193	True
T_CH_7	T_SE_6	0.4573	0.0891	0.8256	True
T_CH_7	T_SE_7	0.6266	0.2583	0.9948	True
T_CZ_6	T_CZ_7	-0.1175	-0.4857	0.2507	False
T_CZ_6	T_DE_6	0.4755	0.1073	0.8438	True
T_CZ_6	T_DE_7	0.4755	0.1073	0.8438	True
T_CZ_6	T_ES_6	0.2909	-0.0773	0.6591	False
T_CZ_6	T_ES_7	1.3497	0.9814	1.7179	True
T_CZ_6	T_NO_6	2.0252	1.6569	2.3934	True
T_CZ_6	T_NO_7	1.5958	1.2276	1.964	True
T_CZ_6	T_SE_6	1.5021	1.1339	1.8703	True
T_CZ_6	T_SE_7	1.6713	1.3031	2.0396	True
T_CZ_7	T_DE_6	0.593	0.2248	0.9612	True
T_CZ_7	T_DE_7	0.593	0.2248	0.9612	True
T_CZ_7	T_ES_6	0.4084	0.0402	0.7766	True
T_CZ_7	T_ES_7	1.4671	1.0989	1.8354	True
T_CZ_7	T_NO_6	2.1427	1.7744	2.5109	True
T_CZ_7	T_NO_7	1.7133	1.3451	2.0815	True
T_CZ_7	T_SE_6	1.6196	1.2513	1.9878	True
T_CZ_7	T_SE_7	1.7888	1.4206	2.157	True
T_DE_6	T_DE_7	0.0	-0.3682	0.3682	False
T_DE_6	T_ES_6	-0.1846	-0.5528	0.1836	False
T_DE_6	T_ES_7	0.8741	0.5059	1.2424	True
T_DE_6	T_NO_6	1.5497	1.1814	1.9179	True
T_DE_6	T_NO_7	1.1203	0.752	1.4885	True
T_DE_6	T_SE_6	1.0266	0.6583	1.3948	True
T_DE_6	T_SE_7	1.1958	0.8276	1.564	True
T_DE_7	T_ES_6	-0.1846	-0.5528	0.1836	False
T_DE_7	T_ES_7	0.8741	0.5059	1.2424	True
T_DE_7	T_NO_6	1.5497	1.1814	1.9179	True
T_DE_7	T_NO_7	1.1203	0.752	1.4885	True
T_DE_7	T_SE_6	1.0266	0.6583	1.3948	True
T_DE_7	T_SE_7	1.1958	0.8276	1.564	True
T_ES_6	T_ES_7	1.0587	0.6905	1.427	True
T_ES_6	T_NO_6	1.7343	1.366	2.1025	True
T_ES_6	T_NO_7	1.3049	0.9367	1.6731	True
T_ES_6	T_SE_6	1.2112	0.843	1.5794	True
T_ES_6	T_SE_7	1.3804	1.0122	1.7487	True
T_ES_7	T_NO_6	0.6755	0.3073	1.0438	True
T_ES_7	T_NO_7	0.2462	-0.1221	0.6144	False
T_ES_7	T_SE_6	0.1524	-0.2158	0.5207	False
T_ES_7	T_SE_7	0.3217	-0.0466	0.6899	False
T_NO_6	T_NO_7	-0.4294	-0.7976	-0.0611	True
T_NO_6	T_SE_6	-0.5231	-0.8913	-0.1548	True
T_NO_6	T_SE_7	-0.3538	-0.7221	0.0144	False
T_NO_7	T_SE_6	-0.0937	-0.4619	0.2745	False
T_NO_7	T_SE_7	0.0755	-0.2927	0.4438	False
T_SE_6	T_SE_7	0.1692	-0.199	0.5375	False

group1	group2	meandiff	lower	upper	reject
H_CH_6	H_CH_7	0.0042	-0.288	0.2964	False
H_CH_6	H_CZ_6	-1.1262	-1.4184	-0.8341	True
H_CH_6	H_CZ_7	-1.1992	-1.4913	-0.907	True
H_CH_6	H_DE_6	-0.5947	-0.8868	-0.3025	True
H_CH_6	H_DE_7	-0.554	-0.8462	-0.2618	True
H_CH_6	H_ES_6	-0.7349	-1.0271	-0.4428	True
H_CH_6	H_ES_7	-0.1893	-0.4815	0.1028	False
H_CH_6	H_NO_6	0.0168	-0.2753	0.309	False
H_CH_6	H_NO_7	-0.1318	-0.424	0.1603	False
H_CH_6	H_SE_6	-0.2581	-0.5502	0.0341	False
H_CH_6	H_SE_7	-0.1571	-0.4492	0.1351	False
H_CH_7	H_CZ_6	-1.1304	-1.4226	-0.8383	True
H_CH_7	H_CZ_7	-1.2034	-1.4955	-0.9112	True
H_CH_7	H_DE_6	-0.5989	-0.891	-0.3067	True
H_CH_7	H_DE_7	-0.5582	-0.8504	-0.266	True
H_CH_7	H_ES_6	-0.7391	-1.0313	-0.447	True
H_CH_7	H_ES_7	-0.1935	-0.4857	0.0986	False
H_CH_7	H_NO_6	0.0126	-0.2795	0.3048	False
H_CH_7	H_NO_7	-0.136	-0.4282	0.1561	False
H_CH_7	H_SE_6	-0.2623	-0.5544	0.0299	False
H_CH_7	H_SE_7	-0.1613	-0.4535	0.1309	False
H_CZ_6	H_CZ_7	-0.0729	-0.3651	0.2192	False
H_CZ_6	H_DE_6	0.5316	0.2394	0.8237	True
H_CZ_6	H_DE_7	0.5722	0.2801	0.8644	True
H_CZ_6	H_ES_6	0.3913	0.0991	0.6835	True
H_CZ_6	H_ES_7	0.9369	0.6447	1.2291	True
H_CZ_6	H_NO_6	1.1431	0.8509	1.4352	True
H_CZ_6	H_NO_7	0.9944	0.7022	1.2866	True
H_CZ_6	H_SE_6	0.8682	0.576	1.1603	True
H_CZ_6	H_SE_7	0.9691	0.677	1.2613	True
H_CZ_7	H_DE_6	0.6045	0.3123	0.8967	True
H_CZ_7	H_DE_7	0.6452	0.353	0.9373	True
H_CZ_7	H_ES_6	0.4642	0.1721	0.7564	True
H_CZ_7	H_ES_7	1.0098	0.7177	1.302	True
H_CZ_7	H_NO_6	1.216	0.9238	1.5082	True
H_CZ_7	H_NO_7	1.0673	0.7752	1.3595	True
H_CZ_7	H_SE_6	0.9411	0.6489	1.2333	True
H_CZ_7	H_SE_7	1.0421	0.7499	1.3342	True
H_DE_6	H_DE_7	0.0407	-0.2515	0.3328	False
H_DE_6	H_ES_6	-0.1403	-0.4324	0.1519	False
H_DE_6	H_ES_7	0.4053	0.1132	0.6975	True
H_DE_6	H_NO_6	0.6115	0.3193	0.9037	True
H_DE_6	H_NO_7	0.4628	0.1707	0.755	True
H_DE_6	H_SE_6	0.3366	0.0444	0.6288	True
H_DE_6	H_SE_7	0.4376	0.1454	0.7298	True
H_DE_7	H_ES_6	-0.1809	-0.4731	0.1112	False
H_DE_7	H_ES_7	0.3647	0.0725	0.6568	True
H_DE_7	H_NO_6	0.5708	0.2787	0.863	True
H_DE_7	H_NO_7	0.4222	0.13	0.7143	True
H_DE_7	H_SE_6	0.2959	0.0038	0.5881	True
H_DE_7	H_SE_7	0.3969	0.1048	0.6891	True
H_ES_6	H_ES_7	0.5456	0.2534	0.8377	True
H_ES_6	H_NO_6	0.7518	0.4596	1.0439	True
H_ES_6	H_NO_7	0.6031	0.3109	0.8952	True
H_ES_6	H_SE_6	0.4769	0.1847	0.769	True
H_ES_6	H_SE_7	0.5778	0.2857	0.87	True
H_ES_7	H_NO_6	0.2062	-0.086	0.4983	False
H_ES_7	H_NO_7	0.0575	-0.2347	0.3497	False
H_ES_7	H_SE_6	-0.0687	-0.3609	0.2234	False
H_ES_7	H_SE_7	0.0323	-0.2599	0.3244	False
H_NO_6	H_NO_7	-0.1487	-0.4408	0.1435	False
H_NO_6	H_SE_6	-0.2749	-0.5671	0.0173	False
H_NO_6	H_SE_7	-0.1739	-0.4661	0.1183	False
H_NO_7	H_SE_6	-0.1262	-0.4184	0.1659	False
H_NO_7	H_SE_7	-0.0252	-0.3174	0.2669	False
H_SE_6	H_SE_7	0.101	-0.1912	0.3931	False

group1	group2	meandiff	lower	upper	reject
CH_1	CH_2	-0.1472	-0.4237	0.1294	False
CH_1	CH_3	-0.1385	-0.4151	0.138	False
CH_1	CH_4	-0.1732	-0.4497	0.1034	False
CH_1	ES_1	-0.2554	-0.532	0.0211	False
CH_1	ES_2	-0.2511	-0.5276	0.0255	False
CH_1	ES_3	-0.2771	-0.5536	-0.0005	True
CH_1	ES_4	-0.2078	-0.4844	0.0688	False
CH_1	SE_1	-0.5844	-0.861	-0.3079	True
CH_1	SE_2	-0.0043	-0.2809	0.2722	False
CH_1	SE_3	-0.1558	-0.4324	0.1207	False
CH_1	SE_4	-0.0866	-0.3631	0.19	False
CH_2	CH_3	0.0087	-0.2679	0.2852	False
CH_2	CH_4	-0.026	-0.3025	0.2506	False
CH_2	ES_1	-0.1082	-0.3848	0.1683	False
CH_2	ES_2	-0.1039	-0.3805	0.1727	False
CH_2	ES_3	-0.1299	-0.4064	0.1467	False
CH_2	ES_4	-0.0606	-0.3372	0.216	False
CH_2	SE_1	-0.4372	-0.7138	-0.1607	True
CH_2	SE_2	0.1429	-0.1337	0.4194	False
CH_2	SE_3	-0.0087	-0.2852	0.2679	False
CH_2	SE_4	0.0606	-0.216	0.3372	False
CH_3	CH_4	-0.0346	-0.3112	0.2419	False
CH_3	ES_1	-0.1169	-0.3934	0.1597	False
CH_3	ES_2	-0.1126	-0.3891	0.164	False
CH_3	ES_3	-0.1385	-0.4151	0.138	False
CH_3	ES_4	-0.0693	-0.3458	0.2073	False
CH_3	SE_1	-0.4459	-0.7224	-0.1693	True
CH_3	SE_2	0.1342	-0.1424	0.4108	False
CH_3	SE_3	-0.0173	-0.2939	0.2592	False
CH_3	SE_4	0.0519	-0.2246	0.3285	False
CH_4	ES_1	-0.0823	-0.3588	0.1943	False
CH_4	ES_2	-0.0779	-0.3545	0.1986	False
CH_4	ES_3	-0.1039	-0.3805	0.1727	False
CH_4	ES_4	-0.0346	-0.3112	0.2419	False
CH_4	SE_1	-0.4113	-0.6878	-0.1347	True
CH_4	SE_2	0.1688	-0.1077	0.4454	False
CH_4	SE_3	0.0173	-0.2592	0.2939	False
CH_4	SE_4	0.0866	-0.19	0.3631	False
ES_1	ES_2	0.0043	-0.2722	0.2809	False
ES_1	ES_3	-0.0216	-0.2982	0.2549	False
ES_1	ES_4	0.0476	-0.2289	0.3242	False
ES_1	SE_1	-0.329	-0.6056	-0.0524	True
ES_1	SE_2	0.2511	-0.0255	0.5276	False
ES_1	SE_3	0.0996	-0.177	0.3761	False
ES_1	SE_4	0.1688	-0.1077	0.4454	False
ES_2	ES_3	-0.026	-0.3025	0.2506	False
ES_2	ES_4	0.0433	-0.2333	0.3199	False
ES_2	SE_1	-0.3333	-0.6099	-0.0568	True
ES_2	SE_2	0.2468	-0.0298	0.5233	False
ES_2	SE_3	0.0952	-0.1813	0.3718	False
ES_2	SE_4	0.1645	-0.1121	0.4411	False
ES_3	ES_4	0.0693	-0.2073	0.3458	False
ES_3	SE_1	-0.3074	-0.5839	-0.0308	True
ES_3	SE_2	0.2727	-0.0038	0.5493	False
ES_3	SE_3	0.1212	-0.1553	0.3978	False
ES_3	SE_4	0.1905	-0.0861	0.467	False
ES_4	SE_1	-0.3766	-0.6532	-0.1001	True
ES_4	SE_2	0.2035	-0.0731	0.48	False
ES_4	SE_3	0.0519	-0.2246	0.3285	False
ES_4	SE_4	0.1212	-0.1553	0.3978	False
SE_1	SE_2	0.5801	0.3035	0.8566	True
SE_1	SE_3	0.4286	0.152	0.7051	True
SE_1	SE_4	0.4978	0.2213	0.7744	True
SE_2	SE_3	-0.1515	-0.4281	0.125	False
SE_2	SE_4	-0.0823	-0.3588	0.1943	False
SE_3	SE_4	0.0693	-0.2073	0.3458	False

1. Did people become less trusting from 2012 to 2014? Compute results for each country in the sample.

2. Did people become happier from 2012 to 2014? Compute results for each country in the sample

3. Who reported watching more TV in 2012, men or women?

4. Who was more likely to believe people were fair in 2012, people living with a partner or people living alone?

5. Pick three or four of the countries in the sample and compare how often people met socially in 2014. Are there differences, and if so, which countries stand out?

6. Pick three or four of the countries in the sample and compare how often people took part in social activities, relative to others of their age, in 2014. Are there differences, and if so, which countries stand out?