In [1]:
import os
from genomics.popgen.plink.convert import to_eigen
from genomics.popgen.pca import plot, smart
%matplotlib inline
In [2]:
f = open('relationships_w_pops_121708.txt')
ind_pop = {}
f.readline() # header
for l in f:
toks = l.rstrip().split('\t')
fam_id = toks[0]
ind_id = toks[1]
pop = toks[-1]
ind_pop['/'.join([fam_id, ind_id])] = pop
f.close()
ind_pop['2469/NA20281'] = ind_pop['2805/NA20281']
In [3]:
to_eigen('hapmap10_auto_noofs_ld_12', 'hapmap10_auto_noofs_ld_12')
In [4]:
ctrl = smart.SmartPCAController('hapmap10_auto_noofs_ld_12')
ctrl.run()
In [5]:
wei, wei_perc, ind_comp = smart.parse_evec('hapmap10_auto_noofs_ld_12.evec', 'hapmap10_auto_noofs_ld_12.eval')
In [6]:
plot.render_pca(ind_comp, 1, 2, cluster=ind_pop)
#put weights
Out[6]:
In [7]:
plot.render_pca_eight(ind_comp, cluster=ind_pop)
Out[7]:
In [8]:
markers = { 'CHB': '*', 'CHD': '*', 'JPT': '*', 'GIH': '*',
'CEU': 'v', 'TSI': 'v', 'MEX': 'v',
'ASW': 'o', 'LWK': 'o', 'YRI': 'o', 'MKK': 'o'
}
In [9]:
#See also, the hapmap paper for the PCA
#Paper on smartpca
In [10]:
from sklearn.decomposition import PCA
import numpy as np
In [11]:
f = open('hapmap10_auto_noofs_ld_12.ped')
ninds = 0
ind_order = []
for line in f:
ninds += 1
toks = line[:100].replace(' ', '\t').split('\t') # for speed
fam_id = toks[0]
ind_id = toks[1]
ind_order.append('%s/%s' % (fam_id, ind_id))
nsnps = (len(line.replace(' ', '\t').split('\t')) - 6) // 2
print (nsnps)
f.close()
In [12]:
pca_array = np.empty((ninds, nsnps), dtype=int)
print pca_array.shape
f = open('hapmap10_auto_noofs_ld_12.ped')
for ind, line in enumerate(f):
snps = line.replace(' ', '\t').split('\t')[6:]
for pos in range(len(snps) // 2):
a1 = int(snps[2 * pos])
a2 = int(snps[2 * pos])
my_code = a1 + a2 - 2
pca_array[ind, pos] = my_code
f.close()
#slow
In [13]:
my_pca = PCA(n_components=8)
my_pca.fit(pca_array)
trans = my_pca.transform(pca_array)
#Memory required
In [14]:
sc_ind_comp = {}
for i, ind_pca in enumerate(trans):
sc_ind_comp[ind_order[i]] = ind_pca
plot.render_pca_eight(sc_ind_comp, cluster=ind_pop)
Out[14]:
In [ ]: