Bloodtypes of PGP Participants


In [85]:
%matplotlib inline
import matplotlib.pyplot as plt
import sqlite3
import pandas as pd
import numpy as np
from scipy import stats
import sqlite3

In [86]:
# Set default plot stype for matplotlib
plt.style.use('ggplot')

# Set default figure size for matplotlib
plt.rcParams['figure.figsize'] = (16, 8)

In [87]:
# connect to untap database, found here: 
#https://workbench.su92l.arvadosapi.com/collections/22d61dd43786c65cd175b04ad6954af0+3119/html/index.html#

conn = sqlite3.connect('Database/untap.db')
c = conn.cursor()
c.execute('SELECT * FROM demographics')
rows = c.fetchall()
data = pd.DataFrame(rows,columns=zip(*c.description)[0])
conn.close()
data


Out[87]:
id human_id date_of_birth gender weight height blood_type race
0 1 hu43860C 1954-08-28 (61 years old) Male 246lbs (112kg) 6ft 5in (195cm) O+ White
1 2 huC30901 1962-05-23 (53 years old) Male 175lbs (79kg) 6ft 2in (187cm) O- White
2 3 hu9385BA 1955-07-05 (60 years old) Male 155lbs (70kg) A+
3 4 hu04FD18 1954-09-18 (61 years old) Male O+ White
4 5 huAE6220 1949-04-24 (66 years old) Male B+ White
5 6 hu604D39 1958-01-19 (58 years old) Male 245lbs (111kg) 5ft 8in (172cm) AB+ Black or African American
6 7 hu6E4515 1921-07-09 (94 years old) Male 180lbs (82kg) 6ft (182cm) O+ White
7 8 huA90CE6 1958-09-25 (57 years old) Male White
8 9 hu34D5B9 1973-01-20 (43 years old) 170lbs (77kg) 5ft 11in (180cm)
9 10 hu2FEC01 1985-10-06 (30 years old) Male 143lbs (65kg) 5ft 9in (175cm) A+ White
10 11 huAA16BD 1927-11-17 (88 years old)
11 12 hu2DBF2D 1982-12-07 (33 years old) Male 190lbs (86kg) 5ft 10in (177cm) O- White
12 13 hu342A08 1943-09-18 (72 years old) Female 160lbs (73kg) 5ft 9in (175cm) White
13 14 hu4339C0
14 15 hu72A81D 1968-11-01 (47 years old) Female 145lbs (66kg) 5ft 10in (177cm) A+ White
15 16 hu0E64A1 1985-02-20 (30 years old) Male 120lbs (54kg) White
16 17 hu3CAB43 1967-11-30 (48 years old) Male 115lbs (52kg) 5ft 6in (167cm) White
17 18 hu2D6140 1942-01-05 (74 years old) Female 142lbs (64kg) 5ft 6in (167cm) O+ White
18 19 huA0E089 1942-11-22 (73 years old) Female 139lbs (63kg) 5ft 4in (162cm) A+ White
19 20 hu8229AE 1969-09-11 (46 years old) Female 153lbs (69kg) 5ft 2in (157cm) O+ White
20 21 huCA017E 1949-03-19 (66 years old) Male 142lbs (64kg) 5ft 6in (167cm) A+ Asian
21 22 hu92C40A 1944-01-13 (72 years old) Female 132lbs (60kg) 5ft 7in (170cm) American Indian or Alaska Native
22 23 hu38168C 1949-07-08 (66 years old) Female 105lbs (48kg) 5ft 3in (160cm) O+ Asian
23 24 huB1FD55 1958-09-19 (57 years old) Male White
24 25 huD81F3D 1969-04-24 (46 years old) Male 210lbs (95kg) 6ft 3in (190cm) A+ White
25 26 huD37D14 1970-02-20 (45 years old) Female 147lbs (67kg) 5ft 6in (167cm) O+ White
26 27 huBAAC98 1980-07-30 (35 years old) Male 240lbs (109kg) 6ft 2in (187cm) O+ White
27 28 huB4940E
28 29 huEC6EEC 1964-07-10 (51 years old) Female 180lbs (82kg) 5ft 4in (162cm) O+ Hispanic or Latino
29 30 huF5E0B6 1971-12-12 (44 years old) Male 185lbs (84kg) 6ft (182cm) A+ White
... ... ... ... ... ... ... ... ...
1303 1304 hu4A6650 1943-10-26 (72 years old) Male 227lbs (103kg) 5ft 11in (180cm) A+ White
1304 1305 hu594129 1962-02-20 (53 years old) Male 163lbs (74kg) 5ft 11in (180cm) B+ White
1305 1306 huB256A2 1982-04-14 (33 years old) 115lbs (52kg)
1306 1307 hu7D6AB1 1948-07-30 (67 years old) Female 93lbs (42kg) 5ft (152cm) O+ Asian
1307 1308 huFEC65A 1969-11-25 (46 years old) Male White
1308 1309 huF9A043 1957-01-08 (59 years old) 162lbs (73kg)
1309 1310 huF9C3EE 1985-11-20 (30 years old) Male 170lbs (77kg) 5ft 10in (177cm) A+ Asian
1310 1311 huD0127A 1964-05-22 (51 years old) 212lbs (96kg)
1311 1312 hu5C4EBA 1966-10-27 (49 years old) 270lbs (122kg)
1312 1313 huD66092 1948-01-02 (68 years old) 116lbs (53kg) 5ft 4in (162cm)
1313 1314 hu039674 1981-04-18 (34 years old) Male 165lbs (75kg) 5ft 6in (167cm) White
1314 1315 hu941A10 1946-03-01 (69 years old) Female 140lbs (64kg) 5ft 3in (160cm) White
1315 1316 hu1D278B 1976-11-26 (39 years old) Male 190lbs (86kg) 5ft 11in (180cm) O+ Hispanic/Latino
1316 1317 hu045168 1980-06-04 (35 years old) 143lbs (65kg) 5ft 9in (175cm)
1317 1318 hu2561FE 1955-01-08 (61 years old) 5ft 9in (175cm)
1318 1319 huD8A5B6 1956-01-28 (60 years old) 184lbs (83kg)
1319 1320 hu2CA680 1964-12-24 (51 years old) 140lbs (64kg)
1320 1321 hu631D32 1937-10-19 (78 years old) Male 156lbs (71kg) 5ft 7in (170cm) AB+ White
1321 1322 hu934217 1962-10-01 (53 years old) Male 191lbs (87kg) 5ft 6in (167cm) O+ White
1322 1323 hu524B5B 1961-05-26 (54 years old) Female 147lbs (67kg) 5ft 3in (160cm) O+ White
1323 1324 hu78E6A9 1963-05-20 (52 years old) Female 142lbs (64kg) 5ft 5in (165cm) O+ White
1324 1325 huDF8970 1975-07-16 (40 years old)
1325 1326 hu313A20 1982-05-29 (33 years old) Male 165lbs (75kg) 6ft (182cm) O+ White
1326 1327 huFBD57F 1962-03-27 (53 years old) Male 201lbs (91kg) 5ft 9in (175cm) A+ White
1327 1328 huC5203C 1978-12-28 (37 years old) Female 98lbs (44kg) 5ft (152cm) White
1328 1329 huABB4CD 1975-01-08 (41 years old) 174lbs (79kg)
1329 1330 huDD1522 1986-02-06 (29 years old) 153lbs (69kg) 5ft 4in (162cm)
1330 1331 hu25C433 1951-08-17 (64 years old)
1331 1332 hu419539 1974-07-22 (41 years old) 161lbs (73kg) 6ft (182cm)
1332 1333 hu7FC773 108lbs (49kg)

1333 rows × 8 columns


In [88]:
# Group data by blood type
bloodtype = data.groupby('blood_type')['human_id'].count()
bloodData = pd.DataFrame(columns=['PGP'])
bloodData.PGP = bloodtype[1:]

In [89]:
# Compare distribution of blood type in PGP with that of the general US population
## Note: Blood type distribution taken from American Red Cross website
## http://givebloodgivelife.org/education/bloodtypes.php

bTotal = bloodtype[1:].sum()
bloodData['USA'] = [0.34 ,.06, .03, 0.01, .09, .02, .38, .07];
bloodData['USA'] = bTotal*bloodData['USA']

In [90]:
bloodData2 = bloodData/bTotal
bloodData2


Out[90]:
PGP USA
blood_type
A+ 0.313618 0.34
A- 0.077029 0.06
AB+ 0.038514 0.03
AB- 0.011004 0.01
B+ 0.107290 0.09
B- 0.013755 0.02
O+ 0.356259 0.38
O- 0.082531 0.07

In [91]:
# Plot comparison as a bar plot
ax = bloodData2.plot(kind = 'bar')
labels = ax.get_xticklabels()
ax.set_xticklabels(bloodData.index,rotation=45);
ax.set_xlabel('blood type');
plt.gcf().subplots_adjust(bottom=0.2)

plt.savefig('Images/bloodtypeAll.svg',format='svg')



In [92]:
# Calculate p-value
chi2, pvalue = stats.chisquare(bloodData['PGP'].values, f_exp= bloodData['USA'].values, axis=None)
pvalue


Out[92]:
0.063511368112054573

In [93]:
# Now look at comparing results for just White participants
idx = data['race'].str.contains('White',na=False)
dataW = data[idx]

In [102]:
bloodtypeW = dataW.groupby('blood_type')['human_id'].count()
bloodDataW = pd.DataFrame(columns=['PGPWhite'])
bloodDataW.PGPWhite = bloodtypeW[1:]

In [103]:
# Compare distribution of blood type in PGP with that of the general US population
## Note: Blood type distribution taken from American Red Cross website
## http://www.redcrossblood.org/learn-about-blood/blood-types

bTotalW = bloodtypeW[1:].sum()
bloodDataW['White'] = [0.33 ,.07, .03, 0.01, .09, .02,.37, .08];
bloodDataW['White'] = bTotalW*bloodDataW['White']

In [114]:
bloodData2W = bloodDataW/bTotalW
bloodData2W['PGPAll']= bloodData2['PGP']

In [115]:
# Plot comparison as a bar plot
ax = bloodData2W[['PGPAll','PGPWhite','White']].plot(kind = 'bar')
labels = ax.get_xticklabels()
ax.set_xticklabels(bloodData.index,rotation=45);
ax.set_xlabel('blood type');
plt.gcf().subplots_adjust(bottom=0.2)

plt.savefig('Images/bloodtypeWhite.svg',format='svg')



In [98]:
# Calculate p-value
chi2, pvalue = stats.chisquare(bloodDataW['PGPWhite'].values, f_exp= bloodDataW['White'].values, axis=None)
pvalue


Out[98]:
0.53666552766244691