In [25]:
import pandas
In [26]:
%cd D:\kmc400-braviz
In [27]:
import sqlite3
In [28]:
conn = sqlite3.Connection("descriptors.sqlite")
In [29]:
q="select subject,structure,area,d1,d2,d3 from descriptors"
In [30]:
from pandas.io import sql
In [31]:
df = pandas.read_sql(q,conn,index_col=["subject","structure"])
In [32]:
df.head()
Out[32]:
In [32]:
In [33]:
descs=["area","d1","d2","d3"]
In [34]:
df2=df.unstack()
In [35]:
df2.head()
Out[35]:
In [36]:
df2.index
Out[36]:
In [37]:
table_subjects = [ 8, 9, 15, 19, 25, 29, 31, 35, 44, 51, 53, 54, 56, 64, 65, 69, 71, 72, 73, 75, 83, 90, 93, 95, 107, 108, 113, 119, 121, 123, 124, 125, 128, 129, 134, 138, 141, 143, 144, 145, 149, 151, 153, 154, 156, 157, 161, 165, 172, 173, 175, 176, 177, 182, 185, 186, 195, 197, 198, 201, 202, 205, 208, 210, 212, 216, 219, 221, 225, 227, 230, 231, 232, 235, 237, 253, 256, 259, 261, 263, 264, 266, 277, 288, 292, 293, 300, 301, 304, 307, 310, 312, 313, 314, 319, 320, 322, 327, 331, 332, 333, 344, 346, 348, 353, 355, 356, 357, 358, 364, 369, 371, 374, 381, 390, 396, 399, 402, 409, 413, 416, 417, 423, 424, 426, 427, 429, 431, 432, 440, 452, 456, 458, 464, 469, 472, 478, 480, 483, 484, 485, 491, 496, 499, 504, 517, 526, 532, 535, 536, 537, 542, 544, 545, 547, 548, 549, 552, 566, 568, 576, 577, 579, 580, 592, 593, 595, 598, 599, 600, 602, 610, 611, 615, 616, 619, 623, 625, 630, 631, 645, 650, 651, 662, 665, 670, 675, 678, 684, 686, 689, 691, 694, 696, 712, 715, 734, 739, 748, 752, 754, 761, 765, 769, 783, 784, 786, 789, 790, 791, 795, 799, 804, 806, 815, 818, 821, 829, 840, 841, 848, 850, 861, 863, 868, 869, 874, 876, 877, 878, 879, 884, 891, 892, 893, 894, 898, 905, 906, 912, 913, 914, 918, 928, 934, 935, 939, 940, 942, 953, 954, 965, 966, 971, 982, 984, 992, 994, 996, 1005, 1006, 1021, 1026, 1039, 1049, 1063, 1076, 1077, 1212, 1213, 1218, 1221, 1224, 1227, 1232, 1234, 1237, 1239, 1242, 1244, 1247, 1249, 1251, 1253, 1260, 1262, 1265, 1267, 1268, 1269, 1271, 1278, 1283, 1291, 1304, 1318, 1320, 1322, 1326, 1333, 1336, 1337, 1338, 1340, 1357, ]
In [38]:
missing = set(table_subjects) - set(df2.index)
print missing
In [39]:
new_index = sorted(set(df2.index).intersection(table_subjects))
In [40]:
df3 = df2.loc[new_index]
In [41]:
cols2 = ["_".join((m_s[1],m_s[0])) for m_s in df3.columns]
In [42]:
df3.columns = cols2
In [43]:
df4=df3.sort(axis=1)
In [44]:
df4.head()
Out[44]:
In [45]:
df4.to_excel("descriptors.xlsx")
In [45]:
In [45]: