In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

import django
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'Carkinos.settings.local'
django.setup()

from probes.models import Dataset,Platform,Sample,CellLine,ProbeID
root=Path('../').resolve()
plus2_path=root.joinpath('src','raw','Affy_U133plus2_probe_info.csv')
val_pth=Path('../').resolve().joinpath('src','PCA_TEST.quantile_normalized.tsv')

In [2]:
import sklearn
from sklearn.decomposition import PCA

In [3]:
import pylab
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [4]:
val = pd.read_table(val_pth.as_posix())
original_data=val

In [5]:
val.head()


Out[5]:
Unnamed: 0 GSM803616_113385hp133a11.cel.gz GSM803621_113390hp133a11.cel.gz GSM803633_113402hp133a11.cel.gz GSM803636_113405hp133a11.cel.gz GSM803640_113409hp133a11.cel.gz GSM803641_113410hp133a11.cel.gz GSM803642_113411hp133a11.cel.gz GSM803643_113412hp133a11.cel.gz GSM803648_113417hp133a11.cel.gz ... GSM886845.CEL.gz GSM886856.CEL.gz GSM886858.CEL.gz GSM886863.CEL.gz GSM886894.CEL.gz GSM886902.CEL.gz GSM886940.CEL.gz GSM886988.CEL.gz GSM887063.CEL.gz GSM887083.CEL.gz
0 1007_s_at 8.466508 8.704320 10.268638 10.743102 8.951665 9.458733 9.652571 9.850145 9.594955 ... 8.461307 9.713824 9.263093 9.417826 8.957506 9.192340 9.659516 9.366531 9.427184 8.005455
1 1053_at 8.980473 8.877624 9.267413 8.228491 8.813888 8.575776 8.217835 7.745895 8.709977 ... 9.298895 9.052348 9.157794 9.815388 9.288431 8.515813 9.575967 9.606156 9.315189 8.951114
2 117_at 6.829027 6.657378 6.710125 6.754070 6.695269 6.947095 6.628113 6.639558 6.817780 ... 6.364418 6.418995 6.339765 6.465320 6.319365 6.380699 6.401607 6.329362 6.411867 6.357796
3 121_at 8.052456 8.211668 8.385423 8.079701 9.812167 9.175841 10.420003 9.760561 7.938326 ... 9.438799 9.287558 7.431182 9.968309 7.775767 9.713154 7.891176 7.216398 7.488675 7.270498
4 1255_g_at 6.234672 6.050170 6.178644 6.130865 6.062958 6.040505 6.003245 6.105956 6.152480 ... 6.021642 6.083411 6.121256 6.018776 6.175244 6.090674 5.948331 5.985304 6.018757 6.104131

5 rows × 40 columns


In [6]:
val.index = val['Unnamed: 0']
val.index.name = None
val_test = val.iloc[:, 1:]
val_test.head()


Out[6]:
GSM803616_113385hp133a11.cel.gz GSM803621_113390hp133a11.cel.gz GSM803633_113402hp133a11.cel.gz GSM803636_113405hp133a11.cel.gz GSM803640_113409hp133a11.cel.gz GSM803641_113410hp133a11.cel.gz GSM803642_113411hp133a11.cel.gz GSM803643_113412hp133a11.cel.gz GSM803648_113417hp133a11.cel.gz GSM803663_113432hp133a11.cel.gz ... GSM886845.CEL.gz GSM886856.CEL.gz GSM886858.CEL.gz GSM886863.CEL.gz GSM886894.CEL.gz GSM886902.CEL.gz GSM886940.CEL.gz GSM886988.CEL.gz GSM887063.CEL.gz GSM887083.CEL.gz
1007_s_at 8.466508 8.704320 10.268638 10.743102 8.951665 9.458733 9.652571 9.850145 9.594955 10.222705 ... 8.461307 9.713824 9.263093 9.417826 8.957506 9.192340 9.659516 9.366531 9.427184 8.005455
1053_at 8.980473 8.877624 9.267413 8.228491 8.813888 8.575776 8.217835 7.745895 8.709977 8.456886 ... 9.298895 9.052348 9.157794 9.815388 9.288431 8.515813 9.575967 9.606156 9.315189 8.951114
117_at 6.829027 6.657378 6.710125 6.754070 6.695269 6.947095 6.628113 6.639558 6.817780 6.746117 ... 6.364418 6.418995 6.339765 6.465320 6.319365 6.380699 6.401607 6.329362 6.411867 6.357796
121_at 8.052456 8.211668 8.385423 8.079701 9.812167 9.175841 10.420003 9.760561 7.938326 7.931323 ... 9.438799 9.287558 7.431182 9.968309 7.775767 9.713154 7.891176 7.216398 7.488675 7.270498
1255_g_at 6.234672 6.050170 6.178644 6.130865 6.062958 6.040505 6.003245 6.105956 6.152480 6.005181 ... 6.021642 6.083411 6.121256 6.018776 6.175244 6.090674 5.948331 5.985304 6.018757 6.104131

5 rows × 39 columns


In [7]:
new_name=["GSM886845.CEL.gz",
"GSM803640_113409hp133a11.cel.gz",
"GSM803699_113468hp133a11.cel.gz",
"GSM803758_118194hp133a11.cel.gz",
"GSM886856.CEL.gz",
"GSM803641_113410hp133a11.cel.gz",
"GSM803700_113469hp133a11.cel.gz",
"GSM803759_118195hp133a11.cel.gz",
"GSM886858.CEL.gz",
"GSM803648_113417hp133a11.cel.gz",
"GSM803707_113476hp133a11.cel.gz",
"GSM803765_118202hp133a11.cel.gz",
"GSM886863.CEL.gz",
"GSM803642_113411hp133a11.cel.gz",
"GSM803701_113470hp133a11.cel.gz",
"GSM803760_118196hp133a11.cel.gz",
"GSM886894.CEL.gz",
"GSM803621_113390hp133a11.cel.gz",
"GSM803680_113449hp133a11.cel.gz",
"GSM803739_118175hp133a11.cel.gz",
"GSM886902.CEL.gz",
"GSM803643_113412hp133a11.cel.gz",
"GSM803702_113471hp133a11.cel.gz",
"GSM886940.CEL.gz",
"GSM803633_113402hp133a11.cel.gz",
"GSM803692_113461hp133a11.cel.gz",
"GSM803751_118187hp133a11.cel.gz",
"GSM886988.CEL.gz",
"GSM803663_113432hp133a11.cel.gz",
"GSM803721_113490hp133a11.cel.gz",
"GSM803779_118217hp133a11.cel.gz",
"GSM887063.CEL.gz",
"GSM803636_113405hp133a11.cel.gz",
"GSM803695_113464hp133a11.cel.gz",
"GSM803754_118190hp133a11.cel.gz",
"GSM887083.CEL.gz",
"GSM803616_113385hp133a11.cel.gz",
"GSM803675_113444hp133a11.cel.gz",
"GSM803734_118170hp133a11.cel.gz"]

In [8]:
val=val.reindex_axis(new_name,axis=1)
val


Out[8]:
GSM886845.CEL.gz GSM803640_113409hp133a11.cel.gz GSM803699_113468hp133a11.cel.gz GSM803758_118194hp133a11.cel.gz GSM886856.CEL.gz GSM803641_113410hp133a11.cel.gz GSM803700_113469hp133a11.cel.gz GSM803759_118195hp133a11.cel.gz GSM886858.CEL.gz GSM803648_113417hp133a11.cel.gz ... GSM803721_113490hp133a11.cel.gz GSM803779_118217hp133a11.cel.gz GSM887063.CEL.gz GSM803636_113405hp133a11.cel.gz GSM803695_113464hp133a11.cel.gz GSM803754_118190hp133a11.cel.gz GSM887083.CEL.gz GSM803616_113385hp133a11.cel.gz GSM803675_113444hp133a11.cel.gz GSM803734_118170hp133a11.cel.gz
1007_s_at 8.461307 8.951665 9.015657 9.114502 9.713824 9.458733 9.473240 9.889285 9.263093 9.594955 ... 10.253357 10.218173 9.427184 10.743102 10.714761 10.652688 8.005455 8.466508 8.407569 8.465109
1053_at 9.298895 8.813888 8.754567 8.090032 9.052348 8.575776 8.510183 9.063475 9.157794 8.709977 ... 8.346023 8.245170 9.315189 8.228491 8.193194 8.255571 8.951114 8.980473 8.859228 9.216713
117_at 6.364418 6.695269 6.768465 6.670836 6.418995 6.947095 6.920350 7.032093 6.339765 6.817780 ... 6.776119 6.830709 6.411867 6.754070 6.701130 6.817833 6.357796 6.829027 6.825796 6.723393
121_at 9.438799 9.812167 9.768166 9.439228 9.287558 9.175841 9.283408 9.229964 7.431182 7.938326 ... 7.878160 7.943019 7.488675 8.079701 8.278690 8.229162 7.270498 8.052456 8.317451 8.175203
1255_g_at 6.021642 6.062958 6.247082 6.190257 6.083411 6.040505 6.193966 6.110434 6.121256 6.152480 ... 6.106823 6.060658 6.018757 6.130865 6.117855 6.117373 6.104131 6.234672 6.196147 6.092855
1294_at 7.143608 7.677207 7.858427 7.484464 7.341028 7.303248 7.340732 7.563531 6.542747 7.083726 ... 7.113014 7.101001 6.670105 7.296643 7.339645 7.425366 7.409747 7.705363 7.714117 7.801424
1316_at 6.581450 6.805741 6.829712 6.774518 7.008686 6.722838 6.862099 6.792364 7.363535 6.790761 ... 6.756319 6.798425 7.108618 6.756501 6.746921 6.719750 7.399729 6.678366 6.863537 6.973697
1320_at 6.473493 7.015123 6.994869 6.944735 6.657676 7.003863 6.835708 6.726457 6.817740 6.882488 ... 6.633831 6.671301 6.815076 6.753532 6.654617 6.579627 6.269738 6.520533 6.659820 6.658652
1405_i_at 6.275908 6.191226 6.241302 6.212968 6.557764 7.911274 7.894463 7.317273 6.244645 6.194665 ... 6.288121 6.180302 6.046728 6.157490 6.093714 6.206802 6.930427 9.010700 8.752516 6.539585
1431_at 6.277947 6.344488 6.423654 6.644586 6.227907 6.347405 6.305207 6.261263 6.219192 6.236192 ... 6.268193 6.168492 6.148696 6.357650 6.256896 6.237912 6.364707 6.521723 6.590202 6.444873
1438_at 6.649263 6.886452 6.928903 6.971679 6.684662 6.845798 6.893492 6.923619 6.816554 7.078571 ... 7.141660 7.175349 6.795596 7.104489 7.323705 7.169444 6.584965 6.792334 6.973303 6.951808
1487_at 7.502459 7.772626 7.712658 8.121175 7.455537 8.453332 8.456870 7.858068 7.654972 8.304019 ... 8.090895 7.989872 7.898157 8.816534 8.913584 8.786682 8.134518 8.744122 8.717095 8.354448
1494_f_at 6.788163 7.052497 6.979584 6.853178 7.110132 6.883010 6.854555 6.879025 6.902448 6.953215 ... 6.872819 6.807467 6.846788 6.903100 6.883550 6.814899 6.858137 6.862733 6.937397 6.950863
1552256_a_at 9.742305 8.305927 8.274931 8.459003 9.579504 9.085175 9.109111 9.075729 9.426221 9.375317 ... 9.112633 9.064170 8.931418 8.734623 8.880275 8.578023 9.601803 8.856825 8.829929 8.692629
1552257_a_at 8.640245 8.316665 8.269918 8.090587 8.586190 8.209832 8.308806 8.020434 8.569997 8.124688 ... 8.886869 8.654164 9.399237 9.218920 9.312068 9.141817 10.006794 9.122020 9.033383 9.263941
1552258_at 6.595646 6.632723 6.631624 6.578109 6.689012 6.503489 6.607899 6.647792 6.573182 6.607899 ... 6.652553 6.577849 6.476279 6.513475 6.653839 6.615497 7.180552 6.590016 6.673735 6.573800
1552261_at 6.597675 6.758199 6.713994 6.738492 6.243540 6.513475 6.774931 6.744445 6.433349 6.662655 ... 6.910108 6.925878 6.531981 6.627641 6.711027 6.745113 6.484900 6.802746 6.785282 6.741516
1552263_at 7.269077 6.980035 7.091692 7.287418 6.866390 6.568297 6.505354 6.491417 7.033222 6.550166 ... 6.950751 6.988343 7.980943 7.191887 7.364214 7.214919 8.297536 7.746236 7.553605 7.822506
1552264_a_at 8.791363 8.717325 8.551634 8.641950 7.860844 7.738866 7.779082 7.890376 8.235752 8.145032 ... 8.007360 8.205981 8.750312 8.741354 8.760425 8.832809 7.876166 8.847554 8.652883 8.868708
1552266_at 6.106524 6.184708 6.242696 6.264500 6.263956 6.252508 6.410460 6.357817 6.116564 6.111203 ... 6.187516 6.271010 6.100387 6.315563 6.173923 6.258601 5.953983 6.089563 6.169958 6.194299
1552269_at 6.360038 6.533480 6.560934 6.553802 6.810985 6.630831 6.567343 6.323243 6.616973 6.547179 ... 6.319941 6.385130 6.183858 6.435981 6.377448 6.438262 6.269964 6.170823 6.454173 6.291534
1552271_at 6.601850 6.950452 7.033705 7.047793 6.689012 6.826511 6.903519 7.071587 6.656315 6.974224 ... 6.837749 6.900402 6.588426 6.915828 6.932547 6.937891 6.806623 6.967314 7.050770 7.238118
1552272_a_at 6.618031 6.795692 6.811591 6.750444 6.697853 6.798438 6.849246 6.868559 6.605907 6.809563 ... 6.779060 6.831590 6.809563 6.891237 7.042150 6.921495 7.050714 7.013123 6.847839 6.938901
1552274_at 8.894650 8.336535 8.336851 7.561139 7.651850 7.485446 7.541046 7.184700 7.282050 7.139235 ... 7.714477 7.253272 7.610776 7.641450 7.655092 7.132774 10.345215 10.244780 10.371346 10.020670
1552275_s_at 8.859018 8.107724 7.906084 7.577564 7.337774 7.280191 7.200617 7.094518 7.111203 6.836587 ... 7.271015 7.156977 7.450577 7.361748 7.271015 7.181704 10.166079 9.908048 9.757667 10.077008
1552276_a_at 6.800536 6.765885 6.792644 6.882330 6.837283 6.834049 6.911692 6.980597 6.695005 6.866539 ... 6.820306 7.015820 7.096952 7.196892 7.411509 7.368866 6.655461 7.093136 7.037371 7.104709
1552277_a_at 10.342374 9.431731 9.437602 10.442156 9.129100 9.341553 9.199096 9.371734 9.824968 8.824591 ... 8.197836 8.327437 8.612375 7.882840 7.933898 7.948517 7.703811 7.584442 7.566612 7.665941
1552278_a_at 6.422734 6.560098 6.462258 6.476149 6.529097 6.578089 6.572967 6.747500 7.071654 6.765564 ... 6.667347 6.849982 6.995856 6.783852 6.957587 7.014821 6.094621 6.373104 6.594043 6.487669
1552279_a_at 7.187289 7.472681 7.415853 7.436497 7.824960 7.801613 7.690133 7.720864 7.872191 7.745838 ... 7.850500 7.757378 7.435773 7.625946 7.741913 7.531324 6.951120 7.297172 7.323127 7.232561
1552280_at 6.298621 6.781984 6.801195 7.012606 6.364218 6.568297 6.505354 6.616540 6.473237 6.490208 ... 6.313898 6.376383 6.320540 6.428855 6.348914 6.318683 6.432025 6.276840 6.360292 6.356794
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
AFFX-PheX-3_at 6.675145 6.707386 6.882313 6.870207 6.651491 6.820250 6.804117 6.956616 6.657285 6.798727 ... 6.870478 6.872741 6.632494 6.924884 6.866711 7.042694 6.690689 6.921807 6.878915 6.924670
AFFX-PheX-5_at 6.455580 6.195937 6.172905 6.153351 6.301715 6.089405 6.145538 6.181747 6.269110 6.155477 ... 6.113505 6.199886 6.217406 6.184622 6.165367 6.138938 6.356141 6.189699 6.186615 6.092255
AFFX-PheX-M_at 6.068269 6.156038 6.107349 6.141441 6.041577 6.079052 6.134476 6.093118 6.068128 6.127596 ... 6.073408 6.097310 6.086859 6.116264 6.095004 6.177887 6.077811 6.167351 6.172008 6.238147
AFFX-r2-Bs-dap-3_at 6.249471 6.075993 6.009928 5.992046 6.145050 6.030399 6.022368 6.091974 6.087029 6.007915 ... 6.015889 6.137714 6.065972 5.904804 6.006670 6.015959 6.161572 6.022368 6.062740 6.058521
AFFX-r2-Bs-dap-5_at 6.152986 6.241163 6.147140 6.134053 6.013742 6.195029 6.124303 6.170736 6.094575 6.297786 ... 6.095059 6.200120 6.138099 6.173525 6.086404 6.109639 6.218142 6.142033 6.158556 6.180863
AFFX-r2-Bs-dap-M_at 6.289910 6.113686 6.096466 6.147086 6.044649 6.094981 6.075443 6.120124 6.105779 6.052777 ... 6.111450 6.050546 6.097120 6.005043 5.975907 6.056784 6.199813 6.134247 6.021485 6.101317
AFFX-r2-Bs-lys-3_at 6.127740 6.032010 6.105478 6.105014 6.094449 6.096361 6.106897 6.200786 6.186464 6.070544 ... 6.171917 6.094003 6.180992 6.080920 6.061514 6.147643 6.104903 6.030509 6.168257 6.127740
AFFX-r2-Bs-lys-5_at 6.135943 6.110973 6.078740 6.034845 6.206716 5.975437 6.106279 6.149710 6.083774 6.128206 ... 5.987228 6.040127 6.202934 6.048886 5.996962 6.026794 6.183317 6.139324 6.058256 6.127307
AFFX-r2-Bs-lys-M_at 6.411120 6.006344 6.132978 6.047883 6.022650 6.071600 6.049315 6.029695 6.091643 6.163895 ... 6.063718 6.041157 6.039041 5.975507 6.087463 6.086998 6.117574 6.175439 6.158923 6.194593
AFFX-r2-Bs-phe-3_at 6.197593 6.252302 6.330227 6.343189 6.326076 6.222072 6.320426 6.401973 6.257608 6.430989 ... 6.341676 6.361435 6.279557 6.283164 6.330227 6.369258 6.198530 6.291769 6.386882 6.453956
AFFX-r2-Bs-phe-5_at 6.217214 6.017229 5.981775 6.099921 6.000980 5.928803 6.055402 6.032010 5.965729 5.927702 ... 5.975115 5.903921 6.175670 6.012660 6.013223 6.180779 6.171466 5.962371 6.029181 6.062475
AFFX-r2-Bs-phe-M_at 6.214276 6.188304 6.124355 5.928917 6.161342 6.155509 6.166839 6.111096 6.164028 6.127569 ... 6.196850 6.088559 6.166839 6.117625 6.109619 6.104123 6.196755 6.181463 6.173726 6.141546
AFFX-r2-Bs-thr-3_s_at 6.402004 6.427363 6.373256 6.369681 6.227929 6.369681 6.355746 6.409785 6.526400 6.433344 ... 6.370165 6.382534 6.369681 6.378903 6.399785 6.414695 6.585450 6.410005 6.431103 6.523846
AFFX-r2-Bs-thr-5_s_at 6.322737 6.406880 6.346150 6.311253 6.194749 6.277323 6.353577 6.375537 6.126638 6.352987 ... 6.269917 6.213263 6.198985 6.312464 6.331306 6.231239 6.379446 6.320576 6.308823 6.320576
AFFX-r2-Bs-thr-M_s_at 6.250623 6.346356 6.261167 6.298320 6.155852 6.200266 6.214775 6.259852 6.188556 6.311226 ... 6.221253 6.179776 6.075889 6.241145 6.162519 6.222131 6.030313 6.283632 6.315167 6.209953
AFFX-r2-Ec-bioB-3_at 9.319543 6.825795 6.671151 6.946643 10.254535 6.831174 6.874307 6.855983 9.562536 6.868567 ... 6.823105 6.925878 9.848600 6.873608 6.764787 6.670178 9.957419 6.930372 6.865079 7.031470
AFFX-r2-Ec-bioB-5_at 8.465761 6.347951 6.312130 6.384996 9.363304 6.298082 6.372027 6.368339 8.734047 6.428495 ... 6.301525 6.332255 9.244666 6.394353 6.337519 6.403508 9.772343 6.316890 6.260670 6.323383
AFFX-r2-Ec-bioB-M_at 8.235824 6.273725 6.239546 6.264705 9.425781 6.329623 6.232759 6.264705 8.606514 6.282181 ... 6.277757 6.319816 9.161505 6.359813 6.216217 6.185835 9.202771 6.369209 6.365004 6.350515
AFFX-r2-Ec-bioC-3_at 9.495407 6.309407 6.334871 6.310438 10.585844 6.337099 6.290488 6.293498 9.711793 6.387295 ... 6.355457 6.245225 10.295938 6.308103 6.296317 6.297484 10.437973 6.401927 6.275433 6.517116
AFFX-r2-Ec-bioC-5_at 9.489423 6.383472 6.294385 6.447813 10.682951 6.377043 6.449618 6.362893 9.650428 6.431914 ... 6.361156 6.418022 10.357246 6.390556 6.262973 6.274245 10.385084 6.308268 6.456042 6.412639
AFFX-r2-Ec-bioD-3_at 11.363453 6.100949 6.101371 5.953686 12.576641 6.103176 6.155868 6.049589 12.180292 6.199140 ... 6.053130 6.125304 12.433519 6.097726 6.084097 6.104278 12.532927 6.047990 6.190695 6.141231
AFFX-r2-Ec-bioD-5_at 11.046390 6.404159 6.322789 6.410712 12.261023 6.421094 6.357697 6.290589 11.477088 6.355987 ... 6.254022 6.290061 11.922789 6.421094 6.295053 6.394957 12.017910 6.351539 6.400404 6.457924
AFFX-r2-P1-cre-3_at 12.993878 6.226018 6.177239 6.123176 13.620865 6.173374 6.218713 6.221984 13.071450 6.277974 ... 6.161425 6.243068 13.249064 6.204134 6.154393 6.091254 13.446583 6.243910 6.220678 6.167413
AFFX-r2-P1-cre-5_at 12.946375 6.220369 6.225791 6.285913 13.611925 6.176134 6.263543 6.219812 12.813406 6.285913 ... 6.327371 6.258942 13.298390 6.275408 6.282079 6.137526 13.244874 6.302892 6.222454 6.367328
AFFX-ThrX-3_at 6.418385 6.374820 6.420509 6.412776 6.214978 6.385491 6.447376 6.344352 6.208287 6.424580 ... 6.441112 6.382513 6.204167 6.405070 6.404394 6.376919 6.217640 6.495314 6.402561 6.424846
AFFX-ThrX-5_at 6.340336 6.366654 6.290862 6.358280 6.573495 6.345559 6.342651 6.363099 6.401005 6.457132 ... 6.324824 6.345991 6.371383 6.356530 6.326964 6.279764 6.273823 6.230751 6.535179 6.441570
AFFX-ThrX-M_at 6.140900 6.168804 6.152987 6.152979 6.047678 6.212597 6.087561 6.155540 6.078949 6.160166 ... 6.140882 6.219913 5.971206 6.142176 6.103085 6.176319 6.214493 6.239957 6.214065 6.218298
AFFX-TrpnX-3_at 6.190548 6.053378 6.060558 6.069456 5.977597 6.049208 6.044035 6.128163 6.092840 6.059186 ... 6.073232 6.051979 6.125884 6.099284 6.097201 6.184191 5.994420 6.133892 6.102592 6.154887
AFFX-TrpnX-5_at 6.398901 6.396692 6.277052 6.393842 6.251874 6.276643 6.285091 6.400862 6.259979 6.303979 ... 6.311178 6.361091 6.208023 6.309272 6.343641 6.362931 6.307840 6.344899 6.363968 6.410162
AFFX-TrpnX-M_at 6.194253 6.192795 6.158547 6.232492 6.082227 6.209021 6.221689 6.234155 6.143113 6.211498 ... 6.230572 6.117310 6.220205 6.155836 6.176493 6.115281 6.239539 6.205497 6.218478 6.322069

54675 rows × 39 columns


In [9]:
val=val.as_matrix()
val


Out[9]:
array([[ 8.46130748,  8.95166541,  9.01565746, ...,  8.46650755,
         8.4075694 ,  8.46510882],
       [ 9.2988951 ,  8.81388777,  8.75456687, ...,  8.98047324,
         8.85922819,  9.21671311],
       [ 6.36441766,  6.69526869,  6.7684647 , ...,  6.82902661,
         6.8257959 ,  6.72339295],
       ..., 
       [ 6.19054803,  6.05337759,  6.06055829, ...,  6.1338922 ,
         6.10259233,  6.15488734],
       [ 6.39890069,  6.39669175,  6.27705151, ...,  6.34489862,
         6.36396778,  6.41016202],
       [ 6.19425287,  6.19279484,  6.1585468 , ...,  6.20549658,
         6.21847785,  6.32206949]])

In [10]:
t_val=np.transpose(val)
pca= PCA(n_components=54675)
X = pca.fit_transform(t_val)
#print(X)

In [11]:
t=pca.explained_variance_ratio_
sum(t[0:40])


Out[11]:
1.0000000000000004

In [12]:
for n in range(1,40):
 print('propotion ',n+1,':',sum(t[0:n]))


propotion  2 : 0.224013222551
propotion  3 : 0.348340536504
propotion  4 : 0.45032991264
propotion  5 : 0.537471099666
propotion  6 : 0.614492157415
propotion  7 : 0.682509628289
propotion  8 : 0.743164833524
propotion  9 : 0.793767503591
propotion  10 : 0.836645963709
propotion  11 : 0.867664062001
propotion  12 : 0.88729130498
propotion  13 : 0.902482804049
propotion  14 : 0.915169405053
propotion  15 : 0.926347810458
propotion  16 : 0.935407341958
propotion  17 : 0.943699256998
propotion  18 : 0.950692270153
propotion  19 : 0.957221588842
propotion  20 : 0.9633487424
propotion  21 : 0.968933758838
propotion  22 : 0.973761566473
propotion  23 : 0.978295231422
propotion  24 : 0.982027577088
propotion  25 : 0.985051174671
propotion  26 : 0.987625100781
propotion  27 : 0.989650909975
propotion  28 : 0.991106444027
propotion  29 : 0.99245149015
propotion  30 : 0.993558126158
propotion  31 : 0.994517117929
propotion  32 : 0.995446510957
propotion  33 : 0.996306236828
propotion  34 : 0.997097380666
propotion  35 : 0.997738512841
propotion  36 : 0.998341206853
propotion  37 : 0.998924947024
propotion  38 : 0.999486225548
propotion  39 : 1.0
propotion  40 : 1.0

In [13]:
#colormap = plt.cm.jet
#colm=[colormap(1)]*29+[colormap(1000)]*10
col1=['r','b','b','b']*5+['r','b','b']+['r','b','b','b']*4
col=['r']*4+['b']*4+['g']*4+['m']*4+['k']*4+['c']*3+['y']*4+['#05f6f0']*4+['#75f605']*4+['#f472ce']*4
print(col1[0:6])
print(col[0:16])


['r', 'b', 'b', 'b', 'r', 'b']
['r', 'r', 'r', 'r', 'b', 'b', 'b', 'b', 'g', 'g', 'g', 'g', 'm', 'm', 'm', 'm']

In [14]:
#fig = pylab.figure()
#ax = Axes3D(fig)  
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1, projection='3d')
xx=np.array(X[:,0]).flatten()
yy=np.array(X[:,1]).flatten()
zz=np.array(X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col[:])
ax.set_title("colored by cell line")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax = fig.add_subplot(1, 2, 2, projection='3d')
xx=np.array(X[:,0]).flatten()
yy=np.array(X[:,1]).flatten()
zz=np.array(X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col1[:])
ax.set_title("colored by data set")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

In [121]:
tt=original_data.loc[['212581_x_at','213453_x_at','217398_x_at',
'AFFX-HUMGAPDH/M33197_3_at',
'AFFX-HUMGAPDH/M33197_5_at',
'AFFX-HUMGAPDH/M33197_M_at'
],:]
tt=tt.iloc[:,1:]
tt


Out[121]:
GSM803616_113385hp133a11.cel.gz GSM803621_113390hp133a11.cel.gz GSM803633_113402hp133a11.cel.gz GSM803636_113405hp133a11.cel.gz GSM803640_113409hp133a11.cel.gz GSM803641_113410hp133a11.cel.gz GSM803642_113411hp133a11.cel.gz GSM803643_113412hp133a11.cel.gz GSM803648_113417hp133a11.cel.gz GSM803663_113432hp133a11.cel.gz ... GSM886845.CEL.gz GSM886856.CEL.gz GSM886858.CEL.gz GSM886863.CEL.gz GSM886894.CEL.gz GSM886902.CEL.gz GSM886940.CEL.gz GSM886988.CEL.gz GSM887063.CEL.gz GSM887083.CEL.gz
212581_x_at 13.692688 13.784569 13.768367 13.735409 13.945257 13.783133 13.770379 13.930343 13.981070 13.678390 ... 13.744419 13.637974 13.802674 13.906091 13.591915 13.963205 13.704083 13.717885 13.686001 13.211381
213453_x_at 13.620251 13.740132 13.691431 13.690015 13.912630 13.701529 13.723103 13.911382 13.924181 13.598943 ... 13.720177 13.594825 13.746064 13.868072 13.472050 13.962561 13.633952 13.698346 13.646558 13.143625
217398_x_at 13.625629 13.766562 13.755158 13.734911 13.955820 13.747659 13.723694 13.952851 13.947258 13.680103 ... 13.701262 13.651801 13.822772 13.936859 13.568534 13.769095 13.722036 13.730397 13.688391 13.244282
AFFX-HUMGAPDH/M33197_3_at 13.487710 13.614178 13.666509 13.573509 13.748241 13.624083 13.647922 13.837788 13.770546 13.567317 ... 13.572755 13.639457 13.659317 13.821375 13.471063 13.770787 13.592931 13.665752 13.646805 13.424041
AFFX-HUMGAPDH/M33197_5_at 13.548932 13.621462 13.697945 13.634097 13.863217 13.695005 13.741313 13.782010 13.824497 13.640025 ... 13.576829 13.480508 13.685483 13.951764 13.330604 13.766827 13.653799 13.708632 13.587779 12.355801
AFFX-HUMGAPDH/M33197_M_at 13.599816 13.638201 13.713933 13.654290 13.865454 13.703310 13.746095 13.883743 13.876947 13.565019 ... 13.735935 13.608367 13.690496 13.881873 13.400269 13.881500 13.467286 13.645673 13.631237 12.627046

6 rows × 39 columns


In [128]:
means=tt.mean().as_matrix()
means


Out[128]:
array([ 13.59583757,  13.69418395,  13.71555711,  13.67037183,
        13.88176989,  13.70911968,  13.72541767,  13.88301942,
        13.88741627,  13.62163277,  13.62041696,  13.69417411,
        13.7016469 ,  13.67529188,  13.85954428,  13.70434524,
        13.68917396,  13.87420595,  13.88528117,  13.61831592,
        13.46847072,  13.67050424,  13.66087641,  13.62801464,
        13.71286136,  13.8154521 ,  13.89340109,  13.76977795,
        13.51029685,  13.67522967,  13.60215521,  13.73446785,
        13.89433915,  13.47240563,  13.85232918,  13.62901466,
        13.69444748,  13.64779517,  13.00102936])

In [137]:
8.95166541-13.69418395


Out[137]:
-4.742518539999999

In [135]:
val


Out[135]:
array([[  8.46130748,   8.95166541,   9.01565746,   9.11450207,
          9.71382391,   9.45873324,   9.47324011,   9.88928544,
          9.26309348,   9.59495523,   9.66603378,   9.65130961,
          9.41782607,   9.65257113,   9.83507795,   9.61984108,
          8.95750628,   8.70432039,   8.7553945 ,   8.69213714,
          9.19234021,   9.85014465,   9.90862442,   9.65951572,
         10.26863798,  10.21440527,  10.16297405,   9.36653101,
         10.22270503,  10.25335657,  10.21817286,   9.42718364,
         10.74310215,  10.71476125,  10.65268828,   8.00545508,
          8.46650755,   8.4075694 ,   8.46510882],
       [  9.2988951 ,   8.81388777,   8.75456687,   8.09003166,
          9.05234794,   8.57577625,   8.51018326,   9.06347503,
          9.1577942 ,   8.70997655,   8.8155648 ,   8.3834526 ,
          9.81538781,   8.21783451,   7.92563044,   9.29293086,
          9.28843147,   8.87762358,   8.74875803,   8.9408213 ,
          8.51581321,   7.74589479,   7.50285323,   9.57596681,
          9.26741293,   9.28199548,   9.23120754,   9.6061562 ,
          8.4568856 ,   8.34602305,   8.24516982,   9.31518917,
          8.22849075,   8.19319419,   8.2555712 ,   8.9511142 ,
          8.98047324,   8.85922819,   9.21671311],
       [  6.36441766,   6.69526869,   6.7684647 ,   6.67083617,
          6.41899524,   6.94709518,   6.92034973,   7.03209252,
          6.3397646 ,   6.81778048,   6.75647659,   6.69025707,
          6.46531997,   6.62811266,   6.77022686,   6.77069571,
          6.31936515,   6.65737768,   6.72588274,   6.73820969,
          6.38069854,   6.639558  ,   6.73633882,   6.40160705,
          6.71012513,   6.7256957 ,   6.89194348,   6.32936187,
          6.74611709,   6.77611894,   6.83070874,   6.41186661,
          6.75407004,   6.70113017,   6.81783284,   6.35779599,
          6.82902661,   6.8257959 ,   6.72339295],
       [  9.43879885,   9.81216739,   9.76816562,   9.43922785,
          9.28755791,   9.17584147,   9.28340826,   9.22996421,
          7.43118241,   7.93832616,   8.19929407,   8.0612578 ,
          9.96830853,  10.42000293,  10.79952571,  10.26189176,
          7.77576728,   8.21166782,   8.29242514,   8.27652939,
          9.71315358,   9.76056079,   9.56956104,   7.89117577,
          8.38542278,   8.4760947 ,   8.58411227,   7.21639781,
          7.93132281,   7.87815991,   7.94301898,   7.4886753 ,
          8.07970133,   8.27868982,   8.2291619 ,   7.27049829,
          8.05245611,   8.31745116,   8.17520311],
       [  6.02164164,   6.06295805,   6.24708212,   6.19025684,
          6.08341078,   6.04050457,   6.19396581,   6.11043425,
          6.12125616,   6.15247972,   6.19185189,   6.04090394,
          6.01877641,   6.00324542,   6.17746643,   6.1500177 ,
          6.17524359,   6.0501699 ,   6.22930745,   6.11194219,
          6.09067397,   6.10595556,   6.12247097,   5.94833067,
          6.17864424,   6.15809177,   6.18230987,   5.98530367,
          6.00518108,   6.10682331,   6.06065817,   6.01875651,
          6.13086484,   6.1178552 ,   6.11737298,   6.10413118,
          6.23467168,   6.19614703,   6.0928554 ]])

In [138]:
norm_val=val-means

In [139]:
t_norm_val=np.transpose(norm_val)
pca= PCA(n_components=3)
norm_X = pca.fit_transform(t_norm_val)
print(norm_X)


[[ 0.57831706  1.27510446  0.21220149]
 [ 1.03317342  0.71330756 -0.05002879]
 [ 0.99976985  0.62911075 -0.09226462]
 [ 0.88060922  0.17434654 -0.60546659]
 [ 0.35864886  0.1175457   0.10819232]
 [ 0.52847743  0.02409358 -0.1177726 ]
 [ 0.63520201  0.01516286 -0.16798159]
 [ 0.35829613 -0.08140018  0.24045808]
 [-1.51262059  0.18854462 -0.30530986]
 [-0.60872798 -0.29528334  0.01846387]
 [-0.37634376 -0.25226118  0.18246918]
 [-0.48602436 -0.44110487 -0.3557259 ]
 [ 0.97597917  0.80037566  1.02230836]
 [ 1.84104994 -0.0402972  -0.12018721]
 [ 2.13683897 -0.20848845 -0.506199  ]
 [ 1.42131809  0.43668209  0.74184298]
 [-1.05933913  0.53440974  0.04288752]
 [-0.71959085  0.68096125 -0.65203495]
 [-0.60872237  0.59985631 -0.72538171]
 [-0.43080784  0.66386172 -0.16217409]
 [ 1.21875903  0.31561608  0.13993411]
 [ 1.34150763 -0.55385075 -0.51563511]
 [ 1.23513133 -0.75972671 -0.70132463]
 [-0.88597171  0.04644456  0.7283155 ]
 [-0.32867346 -0.51143745  0.7276851 ]
 [-0.34388716 -0.41488408  0.55942163]
 [-0.29338306 -0.3611778   0.39467899]
 [-1.70843352  0.22039491  0.27903323]
 [-0.39805182 -0.98363076  0.26128343]
 [-0.56834776 -1.02667033 -0.08812423]
 [-0.41597816 -1.05166017 -0.06122328]
 [-1.33341907  0.08004171  0.16509417]
 [-0.49945091 -1.40552679 -0.28744386]
 [ 0.08247709 -1.46130019  0.38625588]
 [-0.32809205 -1.30008988 -0.21140032]
 [-1.50897323  1.08972442 -0.67693807]
 [-0.74083507  0.84983739 -0.37634266]
 [-0.42103376  0.88783076 -0.39548855]
 [-0.04884763  0.80553748  0.96392179]]

In [140]:
norm_t=pca.explained_variance_ratio_
print('propotion:',sum(norm_t[0:2]))


propotion: 0.830936098325

In [170]:
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1, projection='3d')
xx=np.array(norm_X[:,0]).flatten()
yy=np.array(norm_X[:,1]).flatten()
zz=np.array(norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col[:])
ax.set_title("norm_data colored by cell line")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax = fig.add_subplot(1, 2, 2, projection='3d')
xx=np.array(norm_X[:,0]).flatten()
yy=np.array(norm_X[:,1]).flatten()
zz=np.array(norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col1[:])
ax.set_title("norm_data colored by data set")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

In [171]:
old_data=np.array([
[8.9283,8.8862,8.9374,9.0085,10.151,9.2932,9.2658,9.7032,9.675,9.4598,9.4706,9.4283],
[10.0503,8.6776,8.597,7.9725,9.5993,8.45,8.4428,8.9239,9.6814,8.6094,8.6629,8.3004],
[6.8854,6.6461,6.6673,6.6335,6.9292,6.879,6.8687,6.9176,6.8493,6.761,6.6705,6.6293],
[10.502,9.5326,9.41,9.1377,10.2953,8.8643,8.9176,8.8962,8.064,7.8083,7.9111,7.969],
[6.8712,5.8737,6.0063,5.963,6.7891,5.836,5.9821,5.9546,6.7754,5.986,5.9624,5.8532]
])

In [181]:
old_tval=np.transpose(old_data)
pca= PCA(n_components=3)
old_X = pca.fit_transform(old_tval)
print(old_X)


[[-2.0812633  -0.13694719  0.52126024]
 [-0.33850833  0.70599732  0.20481152]
 [-0.23874222  0.62310044  0.1748619 ]
 [ 0.28846196  0.88478263 -0.04220665]
 [-1.6752425  -0.43474881 -0.69284278]
 [ 0.324978    0.34828994 -0.11791176]
 [ 0.24128995  0.33472026 -0.08704889]
 [ 0.0481936  -0.16678024 -0.31664949]
 [ 0.1670874  -1.36811765  0.27982773]
 [ 1.10332206 -0.39145307  0.06611892]
 [ 1.00513939 -0.36328696  0.05973082]
 [ 1.15528398 -0.03555665 -0.04995157]]

In [182]:
old_norm_t=pca.explained_variance_ratio_
print('propotion:',sum(old_norm_t[0:2]))


propotion: 0.922274193103

In [183]:
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1, projection='3d')
xx=np.array(old_X[:,0]).flatten()
yy=np.array(old_X[:,1]).flatten()
zz=np.array(old_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col[:12])
ax.set_title("old_data colored by cell line")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax = fig.add_subplot(1, 2, 2, projection='3d')
xx=np.array(old_X[:,0]).flatten()
yy=np.array(old_X[:,1]).flatten()
zz=np.array(old_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col1[:12])
ax.set_title("old_data colored by data set")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

In [175]:
old_norm_data=np.array([
[-5.5208,-4.7194,-4.6519,-4.4318,-4.1799,-4.1518,-4.1751,-3.8435,-4.7586,-4.1699,-4.1486,-4.0774],
[-4.3988,-4.928,-4.9924,-5.4678,-4.7316,-4.995,-4.9981,-4.6228,-4.7522,-5.0204,-4.9562,-5.2052],
[-7.5637,-6.9595,-6.922,-6.8069,-7.4017,-6.566,-6.5722,-6.6292,-7.5843,-6.8687,-6.9487,-6.8763],
[-3.9471,-4.1793,-4.1793,-4.3026,-4.0355,-4.5807,-4.5234,-4.6506,-6.3696,-5.8215,-5.7081,-5.5366],
[-7.5779,-7.7319,-7.5813,-7.4773,-7.5417,-7.6091,-7.4589,-7.5922,-7.6582,-7.6437,-7.6567,-7.6524]
])

In [176]:
old_norm_tval=np.transpose(old_norm_data)
pca= PCA(n_components=3)
old_norm_X = pca.fit_transform(old_norm_tval)
print(old_norm_X)


[[-1.11628256  1.16608375 -0.02995322]
 [-0.68402189  0.11460089  0.1375358 ]
 [-0.6718589   0.01831752  0.16620581]
 [-0.47854629 -0.33215007  0.47019693]
 [-0.73360049  0.016997   -0.42765057]
 [-0.17671972 -0.48865285  0.01836763]
 [-0.2431788  -0.48486353  0.03133571]
 [-0.07087121 -0.55310508 -0.45662019]
 [ 1.43063435  0.9396548  -0.01883581]
 [ 1.0334845  -0.08852625  0.03985452]
 [ 0.92354147 -0.05563775 -0.05082307]
 [ 0.78741955 -0.25271843  0.12038646]]

In [177]:
old_n_t=pca.explained_variance_ratio_
print('propotion:',sum(old_n_t[0:2]))


propotion: 0.911316894186

In [180]:
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1, projection='3d')
xx=np.array(old_norm_X[:,0]).flatten()
yy=np.array(old_norm_X[:,1]).flatten()
zz=np.array(old_norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col[:12])
ax.set_title("old_norm_data colored by cell line")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax = fig.add_subplot(1, 2, 2, projection='3d')
xx=np.array(old_norm_X[:,0]).flatten()
yy=np.array(old_norm_X[:,1]).flatten()
zz=np.array(old_norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=col1[:12])
ax.set_title("old_norm_data colored by data set")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

In [185]:
ppth=Path('../').resolve().joinpath('src','raw','ptest_data.xlsx')
pp=pd.read_excel(ppth.as_posix())
pp=pp.as_matrix()
pp


Out[185]:
array([[-5.366 , -5.7631, -5.6608, -5.6256, -4.8057, -5.6661, -5.5468,
        -5.3902, -5.0451, -5.2662, -4.8927, -4.9939, -4.3508, -4.3943,
        -5.421 , -5.2474],
       [-7.1705, -7.3635, -5.2312, -5.1687, -7.5804, -6.7095, -6.1366,
        -7.5169, -6.9669, -6.0586, -7.5595, -5.2557, -7.2247, -5.8687,
        -7.0951, -6.9543],
       [-6.1506, -6.4969, -5.673 , -5.4194, -6.6505, -5.8046, -5.7578,
        -6.7381, -5.7962, -5.5539, -6.1081, -4.7379, -6.3432, -5.4007,
        -5.9156, -5.9778],
       [-7.4163, -7.6428, -6.2846, -6.0678, -7.7376, -7.6556, -6.4619,
        -7.7967, -7.8114, -6.2607, -7.6419, -5.7602, -7.5193, -6.1356,
        -7.8308, -7.8084],
       [-6.7061, -7.0073, -5.8592, -5.5943, -6.6342, -6.8321, -6.0719,
        -7.2331, -7.0227, -5.86  , -6.987 , -5.1277, -6.334 , -5.5927,
        -7.0556, -7.1553],
       [-7.2542, -7.5315, -6.0611, -6.0768, -7.5042, -6.9675, -6.3497,
        -7.2016, -6.7728, -6.1676, -7.3919, -5.5271, -7.2688, -5.9041,
        -7.0795, -7.216 ],
       [-5.351 , -6.4082, -5.7879, -5.6501, -6.4023, -6.5835, -6.2056,
        -7.2589, -6.7977, -6.0623, -7.1961, -5.5584, -7.0688, -5.8349,
        -6.5194, -6.1492],
       [-5.6758, -5.6102, -4.732 , -4.6727, -6.1917, -5.7479, -5.8774,
        -5.8232, -5.0315, -5.1327, -6.2051, -4.4296, -5.7149, -4.9915,
        -5.6598, -5.8521],
       [-6.7913, -7.1181, -5.9237, -5.7847, -7.1502, -6.9125, -6.1993,
        -7.1933, -6.8246, -5.9417, -7.0551, -5.3498, -6.9104, -5.7297,
        -6.9821, -7.0431],
       [-4.7406, -5.8436, -5.389 , -5.1811, -6.6555, -5.7675, -5.699 ,
        -6.8278, -6.1393, -5.8158, -6.5356, -5.3201, -6.2358, -5.1938,
        -5.8467, -5.9764]])

In [187]:
old_norm_tval=np.transpose(pp)
pca= PCA(n_components=3)
old_norm_X = pca.fit_transform(old_norm_tval)
print(old_norm_X)


[[  3.57909214e-01   1.45167116e+00  -8.34760590e-01]
 [  1.43705365e+00   6.57368909e-01   1.69662377e-01]
 [ -2.14339445e+00   2.44562198e-01   5.10341660e-01]
 [ -2.55896716e+00   2.95749196e-01   3.19789509e-01]
 [  1.84064926e+00  -3.40068949e-01  -5.35856542e-01]
 [  6.97232647e-01   3.67917331e-01   4.34733141e-01]
 [ -8.78073683e-01  -4.08845839e-02   1.33606554e-01]
 [  2.12257996e+00  -5.11199088e-01   5.66596069e-01]
 [  8.08135778e-01  -2.44727149e-01   3.84347661e-01]
 [ -1.47559264e+00  -3.31625912e-01   8.32919369e-02]
 [  1.87500035e+00  -6.39713082e-01  -6.66600459e-02]
 [ -3.36681271e+00  -3.94783019e-01  -1.07724701e-01]
 [  1.18423070e+00  -8.76125244e-01  -5.41964184e-01]
 [ -2.13883587e+00  -4.43423820e-01  -7.24914269e-01]
 [  1.10211353e+00   3.52543067e-01   2.07640348e-01]
 [  1.13677141e+00   4.52738990e-01   1.87107653e-03]]

In [188]:
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1, projection='3d')
xx=np.array(old_norm_X[:,0]).flatten()
yy=np.array(old_norm_X[:,1]).flatten()
zz=np.array(old_norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=['r','r','r','r','g','g','g','b','b','b','c','c','m','m','g','g'])
ax.set_title("ppdata colored by cell line")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax = fig.add_subplot(1, 2, 2, projection='3d')
xx=np.array(old_norm_X[:,0]).flatten()
yy=np.array(old_norm_X[:,1]).flatten()
zz=np.array(old_norm_X[:,2]).flatten()
ax.scatter(xx,yy,zz,s=80,marker=(5,3),c=['r','r','b','b','r','g','b','r','g','b','r','b','r','b','g','g'])
ax.set_title("ppdata colored by data set")
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

In [ ]: