In [1]:
import numpy as np
import pandas as pd
import scipy as sp

In [2]:
df = pd.read_csv("maplike_survey_201612.csv")

In [3]:
df.describe()


Out[3]:
id AGE DEGREE FAC ITSKILLS VISSKILLS C1_SQ001 C1_SQ002 C1_SQ003 C2_SQ003 C2_SQ001 C2_SQ002 C3_SQ001 C3_SQ002 C3_SQ003
count 40.000000 40.000000 40.00000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000 40.000000
mean 20.500000 22.625000 0.77500 6.525000 5.175000 3.575000 3.700000 4.425000 5.225000 2.650000 3.975000 4.400000 3.550000 4.275000 5.825000
std 11.690452 2.695557 0.65974 2.571914 1.375379 1.337573 1.505545 1.448031 1.609069 1.477177 1.804375 1.794579 1.647842 1.739253 1.483024
min 1.000000 17.000000 0.00000 2.000000 3.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
25% 10.750000 21.000000 0.00000 4.000000 5.000000 3.000000 3.000000 3.750000 4.750000 1.000000 2.000000 3.000000 2.000000 3.000000 5.000000
50% 20.500000 23.000000 1.00000 8.000000 5.000000 4.000000 4.000000 4.000000 6.000000 2.000000 4.000000 5.000000 4.000000 5.000000 6.000000
75% 30.250000 24.000000 1.00000 9.000000 6.000000 4.250000 4.000000 6.000000 6.000000 4.000000 5.000000 6.000000 5.000000 6.000000 7.000000
max 40.000000 28.000000 2.00000 9.000000 7.000000 6.000000 7.000000 7.000000 7.000000 6.000000 7.000000 7.000000 7.000000 7.000000 7.000000

In [4]:
N = len(df); print(N)


40

Demographic Data

Degree


In [5]:
all_counts = df["DEGREE"].value_counts(sort=False)
for name, count, percent in zip(np.nditer(all_counts.axes), np.nditer(all_counts), np.nditer(100.0 * all_counts / N)):
    print(f"{name}\t{count}\t{percent}%")


0	14	35.0%
1	21	52.5%
2	5	12.5%

Faculty/Unit


In [6]:
all_counts = df["FAC"].value_counts(sort=False)
for name, count, percent in zip(np.nditer(all_counts.axes), np.nditer(all_counts), np.nditer(100.0 * all_counts / N)):
    print(f"{name}\t{count}\t{percent}%")


2	2	5.0%
3	6	15.0%
4	6	15.0%
5	1	2.5%
7	4	10.0%
8	7	17.5%
9	14	35.0%

Calculating 95% confidence interval


In [7]:
from scipy import stats

In [8]:
# This is from a text-book: http://www.stat.yale.edu/Courses/1997-98/101/confint.htm
mean = 98.249
sd = 0.733
N = 130
stats.norm.interval(0.95, loc=mean, scale=sd / np.sqrt(N))


Out[8]:
(98.122997143856793, 98.375002856143198)

In [9]:
# 95% confidence interval for AGE
mean = df.AGE.mean()
sd = df.AGE.std()
N = len(df.AGE)
stats.norm.interval(0.95, loc=mean, scale=sd / np.sqrt(N))


Out[9]:
(21.78965369022621, 23.46034630977379)

Mean values for C1_SQ001, C1_SQ002, C1_SQ003


In [10]:
(df.C1_SQ001.mean(), df.C1_SQ002.mean(), df.C1_SQ003.mean())


Out[10]:
(3.7, 4.425, 5.225)

Median value


In [11]:
df.C1_SQ001.median(), df.C1_SQ002.median(), df.C1_SQ003.median()


Out[11]:
(4.0, 4.0, 6.0)

Wilcoxon signed rank test


In [12]:
def wtest(a, b):
    result = sp.stats.wilcoxon(a, b)
    print("statistic={}, pvalue={:.4f}".format(*result))
    print("effect size: r={:.2f}".format(result.statistic / np.sqrt(len(a))))
    print()

In [13]:
wtest(df.C1_SQ001, df.C1_SQ002)
wtest(df.C1_SQ002, df.C1_SQ003)


statistic=23.0, pvalue=0.0000
effect size: r=3.64

statistic=113.5, pvalue=0.0021
effect size: r=17.95


In [14]:
(df.C2_SQ003.mean(), df.C2_SQ001.mean(), df.C2_SQ002.mean())


Out[14]:
(2.65, 3.975, 4.4)

In [15]:
df.C2_SQ003.median(), df.C2_SQ001.median(), df.C2_SQ002.median()


Out[15]:
(2.0, 4.0, 5.0)

In [16]:
wtest(df.C2_SQ003, df.C2_SQ001)
wtest(df.C2_SQ001, df.C2_SQ002)


statistic=38.0, pvalue=0.0007
effect size: r=6.01

statistic=89.0, pvalue=0.0445
effect size: r=14.07


In [17]:
(df.C3_SQ001.mean(), df.C3_SQ002.mean(), df.C3_SQ003.mean())


Out[17]:
(3.55, 4.275, 5.825)

In [18]:
df.C3_SQ001.median(), df.C3_SQ002.median(), df.C3_SQ003.median()


Out[18]:
(4.0, 5.0, 6.0)

In [19]:
wtest(df.C3_SQ001, df.C3_SQ002)
wtest(df.C3_SQ002, df.C3_SQ003)
wtest(df.C3_SQ001, df.C3_SQ003)


statistic=37.5, pvalue=0.0005
effect size: r=5.93

statistic=9.5, pvalue=0.0000
effect size: r=1.50

statistic=4.5, pvalue=0.0000
effect size: r=0.71

comparing the best of C1 (round border) and C2 (straight border)


In [20]:
wtest(df.C1_SQ003, df.C2_SQ002)


statistic=47.5, pvalue=0.0017
effect size: r=7.51


In [ ]: