In [1]:
import numpy as np
import pandas as pd
import scipy as sp
In [2]:
df = pd.read_csv("maplike_survey_201612.csv")
In [3]:
df.describe()
Out[3]:
In [4]:
N = len(df); print(N)
In [5]:
all_counts = df["DEGREE"].value_counts(sort=False)
for name, count, percent in zip(np.nditer(all_counts.axes), np.nditer(all_counts), np.nditer(100.0 * all_counts / N)):
print(f"{name}\t{count}\t{percent}%")
In [6]:
all_counts = df["FAC"].value_counts(sort=False)
for name, count, percent in zip(np.nditer(all_counts.axes), np.nditer(all_counts), np.nditer(100.0 * all_counts / N)):
print(f"{name}\t{count}\t{percent}%")
In [7]:
from scipy import stats
In [8]:
# This is from a text-book: http://www.stat.yale.edu/Courses/1997-98/101/confint.htm
mean = 98.249
sd = 0.733
N = 130
stats.norm.interval(0.95, loc=mean, scale=sd / np.sqrt(N))
Out[8]:
In [9]:
# 95% confidence interval for AGE
mean = df.AGE.mean()
sd = df.AGE.std()
N = len(df.AGE)
stats.norm.interval(0.95, loc=mean, scale=sd / np.sqrt(N))
Out[9]:
In [10]:
(df.C1_SQ001.mean(), df.C1_SQ002.mean(), df.C1_SQ003.mean())
Out[10]:
Median value
In [11]:
df.C1_SQ001.median(), df.C1_SQ002.median(), df.C1_SQ003.median()
Out[11]:
In [12]:
def wtest(a, b):
result = sp.stats.wilcoxon(a, b)
print("statistic={}, pvalue={:.4f}".format(*result))
print("effect size: r={:.2f}".format(result.statistic / np.sqrt(len(a))))
print()
In [13]:
wtest(df.C1_SQ001, df.C1_SQ002)
wtest(df.C1_SQ002, df.C1_SQ003)
In [14]:
(df.C2_SQ003.mean(), df.C2_SQ001.mean(), df.C2_SQ002.mean())
Out[14]:
In [15]:
df.C2_SQ003.median(), df.C2_SQ001.median(), df.C2_SQ002.median()
Out[15]:
In [16]:
wtest(df.C2_SQ003, df.C2_SQ001)
wtest(df.C2_SQ001, df.C2_SQ002)
In [17]:
(df.C3_SQ001.mean(), df.C3_SQ002.mean(), df.C3_SQ003.mean())
Out[17]:
In [18]:
df.C3_SQ001.median(), df.C3_SQ002.median(), df.C3_SQ003.median()
Out[18]:
In [19]:
wtest(df.C3_SQ001, df.C3_SQ002)
wtest(df.C3_SQ002, df.C3_SQ003)
wtest(df.C3_SQ001, df.C3_SQ003)
In [20]:
wtest(df.C1_SQ003, df.C2_SQ002)
In [ ]: