In [25]:
import pandas

In [26]:
%cd D:\kmc400-braviz


D:\kmc400-braviz

In [27]:
import sqlite3

In [28]:
conn = sqlite3.Connection("descriptors.sqlite")

In [29]:
q="select subject,structure,area,d1,d2,d3 from descriptors"

In [30]:
from pandas.io import sql

In [31]:
df = pandas.read_sql(q,conn,index_col=["subject","structure"])

In [32]:
df.head()


Out[32]:
area d1 d2 d3
subject structure
8 ctx-lh-parahippocampal 2278.972681 37.429935 27.662001 18.309098
144 ctx-lh-entorhinal 1301.127051 31.906112 22.805641 12.805795
8 ctx-lh-unknown 624.890113 71.958321 65.579963 24.555925
144 ctx-lh-postcentral 6899.567492 91.104336 38.530314 20.813732
8 ctx-rh-parahippocampal 2580.549424 39.370039 22.284075 15.277122

In [32]:


In [33]:
descs=["area","d1","d2","d3"]

In [34]:
df2=df.unstack()

In [35]:
df2.head()


Out[35]:
area ... d3
structure 3rd-Ventricle 4th-Ventricle 5th-Ventricle Brain-Stem CC-Full CC_Anterior CC_Central CC_Mid_Anterior CC_Mid_Posterior CC_Posterior ... ctx-rh-rostralanteriorcingulate ctx-rh-rostralmiddlefrontal ctx-rh-superiorfrontal ctx-rh-superiorparietal ctx-rh-superiortemporal ctx-rh-supramarginal ctx-rh-temporalpole ctx-rh-transversetemporal ctx-rh-unknown non-WM-hypointensities
subject
8 1042.746860 2324.878508 NaN 19901.106811 NaN 931.075837 349.613667 399.028359 326.393362 938.169249 ... 13.867242 34.474535 36.268758 29.907202 21.840511 32.846882 14.357744 9.730251 29.612856 31.402270
9 641.619491 1403.124541 0.035986 19105.559066 NaN 827.894232 552.128907 449.101797 415.486599 760.369338 ... 16.315684 28.157098 28.987894 31.327106 24.768581 39.578074 16.031983 9.157265 29.442350 NaN
15 394.184189 997.982595 NaN 14207.624250 NaN 762.970895 376.312508 449.414371 332.295052 872.130594 ... 10.291902 27.141029 31.444940 33.197328 20.185973 35.341697 17.011382 7.816032 25.034827 24.233473
19 353.600531 993.396872 NaN 19291.102927 NaN 852.189519 372.058946 341.943886 377.845119 980.551577 ... 12.437163 28.664058 36.267630 33.355277 24.461873 31.105284 16.346414 11.192667 27.950776 34.210083
25 1109.025713 1909.614231 0.190569 20935.992450 NaN 860.965305 456.710971 453.471441 405.456617 1001.375148 ... 18.343729 32.607878 37.655168 29.434078 34.904352 38.723101 12.684097 13.565896 29.122445 8.558598

5 rows × 464 columns


In [36]:
df2.index


Out[36]:
Int64Index([8, 9, 15, 19, 25, 29, 31, 35, 44, 51, 53, 54, 56, 64, 65, 69, 71, 72, 75, 83, 90, 93, 95, 107, 108, 113, 119, 121, 123, 124, 125, 128, 129, 134, 138, 141, 143, 144, 145, 149, 151, 153, 154, 156, 157, 161, 165, 172, 173, 175, 176, 177, 182, 185, 186, 195, 197, 198, 201, 202, 205, 208, 210, 212, 216, 219, 221, 225, 227, 230, 231, 232, 235, 237, 253, 256, 261, 263, 264, 266, 277, 288, 292, 293, 300, 301, 304, 307, 310, 313, 314, 319, 320, 322, 327, 331, 332, 333, 344, 346, ...], dtype='int64')

In [37]:
table_subjects = [ 8, 9, 15, 19, 25, 29, 31, 35, 44, 51, 53, 54, 56, 64, 65, 69, 71, 72, 73, 75, 83, 90, 93, 95, 107, 108, 113, 119, 121, 123, 124, 125, 128, 129, 134, 138, 141, 143, 144, 145, 149, 151, 153, 154, 156, 157, 161, 165, 172, 173, 175, 176, 177, 182, 185, 186, 195, 197, 198, 201, 202, 205, 208, 210, 212, 216, 219, 221, 225, 227, 230, 231, 232, 235, 237, 253, 256, 259, 261, 263, 264, 266, 277, 288, 292, 293, 300, 301, 304, 307, 310, 312, 313, 314, 319, 320, 322, 327, 331, 332, 333, 344, 346, 348, 353, 355, 356, 357, 358, 364, 369, 371, 374, 381, 390, 396, 399, 402, 409, 413, 416, 417, 423, 424, 426, 427, 429, 431, 432, 440, 452, 456, 458, 464, 469, 472, 478, 480, 483, 484, 485, 491, 496, 499, 504, 517, 526, 532, 535, 536, 537, 542, 544, 545, 547, 548, 549, 552, 566, 568, 576, 577, 579, 580, 592, 593, 595, 598, 599, 600, 602, 610, 611, 615, 616, 619, 623, 625, 630, 631, 645, 650, 651, 662, 665, 670, 675, 678, 684, 686, 689, 691, 694, 696, 712, 715, 734, 739, 748, 752, 754, 761, 765, 769, 783, 784, 786, 789, 790, 791, 795, 799, 804, 806, 815, 818, 821, 829, 840, 841, 848, 850, 861, 863, 868, 869, 874, 876, 877, 878, 879, 884, 891, 892, 893, 894, 898, 905, 906, 912, 913, 914, 918, 928, 934, 935, 939, 940, 942, 953, 954, 965, 966, 971, 982, 984, 992, 994, 996, 1005, 1006, 1021, 1026, 1039, 1049, 1063, 1076, 1077, 1212, 1213, 1218, 1221, 1224, 1227, 1232, 1234, 1237, 1239, 1242, 1244, 1247, 1249, 1251, 1253, 1260, 1262, 1265, 1267, 1268, 1269, 1271, 1278, 1283, 1291, 1304, 1318, 1320, 1322, 1326, 1333, 1336, 1337, 1338, 1340, 1357, ]

In [38]:
missing = set(table_subjects) - set(df2.index)
print missing


set([312, 73, 259])

In [39]:
new_index = sorted(set(df2.index).intersection(table_subjects))

In [40]:
df3 = df2.loc[new_index]

In [41]:
cols2 = ["_".join((m_s[1],m_s[0])) for m_s in df3.columns]

In [42]:
df3.columns = cols2

In [43]:
df4=df3.sort(axis=1)

In [44]:
df4.head()


Out[44]:
3rd-Ventricle_area 3rd-Ventricle_d1 3rd-Ventricle_d2 3rd-Ventricle_d3 4th-Ventricle_area 4th-Ventricle_d1 4th-Ventricle_d2 4th-Ventricle_d3 5th-Ventricle_area 5th-Ventricle_d1 ... ctx-rh-transversetemporal_d2 ctx-rh-transversetemporal_d3 ctx-rh-unknown_area ctx-rh-unknown_d1 ctx-rh-unknown_d2 ctx-rh-unknown_d3 non-WM-hypointensities_area non-WM-hypointensities_d1 non-WM-hypointensities_d2 non-WM-hypointensities_d3
subject
8 1042.746860 36.276714 17.819852 6.320717 2324.878508 49.889879 26.155790 21.261250 NaN NaN ... 12.586128 9.730251 468.230734 89.336443 66.987532 29.612856 6.132118 91.831367 48.206247 31.402270
9 641.619491 33.376639 17.140485 5.425703 1403.124541 43.208795 20.090662 16.680234 0.035986 NaN ... 13.389130 9.157265 491.930440 75.610846 57.675523 29.442350 NaN NaN NaN NaN
15 394.184189 28.178006 17.457000 4.612694 997.982595 39.937451 23.283802 15.212459 NaN NaN ... 15.531115 7.816032 689.650087 66.820655 59.974526 25.034827 9.535910 53.497664 49.886065 24.233473
19 353.600531 28.792360 15.420484 5.069910 993.396872 43.278170 21.165607 12.541365 NaN NaN ... 12.197054 11.192667 965.982823 81.553663 60.539905 27.950776 22.280935 77.317527 51.776174 34.210083
25 1109.025713 35.185224 15.608342 8.507182 1909.614231 47.138095 22.108704 18.356068 0.190569 NaN ... 14.344592 13.565896 1177.163048 74.779676 59.780500 29.122445 1.769736 72.965745 41.464603 8.558598

5 rows × 464 columns


In [45]:
df4.to_excel("descriptors.xlsx")

In [45]:


In [45]: