In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
root=Path('../').resolve()
nci_val_pth=Path('../').resolve().joinpath('src','nci60.npy')
#sanger_cell_line_proj.npy/nci60.npy/GSE36133.npy
#nci_val=np.load(nci_val_pth.as_posix(),mmap_mode='r')
#pd.isnull(nci_val).any(1).nonzero()[0]
#SANGER:start from row[0]---->row[22275]to row[22280] has nan
#nci_val=nci_val[~np.isnan(nci_val).any(axis=1)]
#pd.isnull(nci_val).any(1).nonzero()[0]
In [3]:
import sklearn
from sklearn.decomposition import PCA
In [95]:
val_pth=Path('../').resolve().joinpath('src','raw','ptest_data.xlsx')
val=pd.read_excel(val_pth.as_posix())
val=val.as_matrix()
val
Out[95]:
In [96]:
#val=nci_val
#val=np.array([[-4.8927,-4.9939,-4.3508,-4.3943],
#[-7.5595,-5.2557,-7.2247,-5.8687],
#[-6.1081,-4.7379,-6.3432,-5.4007],
#[-7.6419,-5.7602,-7.5193,-6.1356],
#[-6.987,-5.1277,-6.334,-5.5927],
#[-7.3919,-5.5271,-7.2688,-5.9041],
#[-7.1961,-5.5584,-7.0688,-5.8349],
#[-6.2051,-4.4296,-5.7149,-4.9915],
#[-7.0551,-5.3498,-6.9104,-5.7297],
#[-6.5356,-5.3201,-6.2358,-5.1938],
#
#])
t_val=np.transpose(val)
t_val
Out[96]:
In [97]:
pca= PCA(n_components=3)
X = pca.fit_transform(t_val)
print(X)
In [98]:
t=pca.explained_variance_ratio_
print('propotion:',sum(t[0:2]))
#print('propotion:',sum(t[0:10]))
#print('propotion:',sum(t[0:20]))
#print('propotion:',sum(t[0:30]))
In [100]:
import pylab
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = pylab.figure()
ax = Axes3D(fig)
#r=gse36133,b=sanger,g=nci60
#col=['r','r','b','b','r','g','b','r','g','b','r','b','r','b','g','g']
#r=TT,g=UACC-62,b=LOXIMVI,c=A3-KAW,m=COLO-741
col=['r','r','r','r','g','g','g','b','b','b','c','c','m','m','g','g']
#col=['r']*15+['b']*18+['g']*21+['m']*18+['k']*26+['c']*26+['y']*26+['#05f6f0']*21+['#75f605']*6+['#f472ce']*23
xx=np.array(X[:,0]).flatten()
yy=np.array(X[:,1]).flatten()
zz=np.array(X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col[:])
plt.show()
In [ ]:
import django
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'Carkinos.settings.local'
django.setup()
from probes.models import Sample,ProbeID,CellLine,Dataset,Platform
nci60_offset=Sample.objects.filter(dataset_id__name__in=['NCI60']).values_list('offset',flat=True)
In [ ]:
nci_sample=Sample.objects.filter(dataset_id__name__in=['NCI60'])
pprobe=ProbeID.objects.filter(platform=3)
poffset=pprobe.values_list('offset',flat=True)
nci_val[np.ix_([0,1,2],[0,1])]
In [ ]:
print(nci_sample[1].offset)
nci_sample[15].cell_line_id.primary_site
In [ ]:
#pd.isnull(new_sanger_val).any(1).nonzero()[0]
In [ ]:
In [ ]: