In [2]:
cd ~/Downloads/GSE67835_RAW/


/Users/kirkreardon/Downloads/GSE67835_RAW

In [3]:
ls


GSM1657871_1772078217.C03.csv     GSM1658104_nochipID3.C69.csv.gz
GSM1657871_1772078217.C03.csv.gz  GSM1658105_nochipID3.C71.csv.gz
GSM1657872_1772078217.C04.csv.gz  GSM1658106_nochipID3.C72.csv.gz
GSM1657873_1772078217.C06.csv.gz  GSM1658107_nochipID3.C74.csv.gz
GSM1657874_1772078217.C07.csv.gz  GSM1658108_nochipID3.C75.csv.gz
GSM1657875_1772078217.C08.csv.gz  GSM1658109_nochipID3.C76.csv.gz
GSM1657876_1772078217.C09.csv.gz  GSM1658110_nochipID3.C80.csv.gz
GSM1657877_1772078217.C14.csv.gz  GSM1658111_nochipID3.C81.csv.gz
GSM1657878_1772078217.C16.csv.gz  GSM1658112_nochipID3.C84.csv.gz
GSM1657879_1772078217.C17.csv.gz  GSM1658113_nochipID3.C85.csv.gz
GSM1657880_1772078217.C18.csv.gz  GSM1658114_nochipID3.C86.csv.gz
GSM1657881_1772078217.C20.csv.gz  GSM1658115_nochipID3.C91.csv.gz
GSM1657882_1772078217.C23.csv.gz  GSM1658116_nochipID5.C04.csv.gz
GSM1657883_1772078217.C28.csv.gz  GSM1658117_nochipID5.C05.csv.gz
GSM1657884_1772078217.C29.csv.gz  GSM1658118_nochipID5.C18.csv.gz
GSM1657885_1772078217.C32.csv.gz  GSM1658119_nochipID5.C19.csv.gz
GSM1657886_1772078217.C33.csv.gz  GSM1658120_nochipID5.C42.csv.gz
GSM1657887_1772078217.C39.csv.gz  GSM1658121_nochipID5.C44.csv.gz
GSM1657888_1772078217.C40.csv.gz  GSM1658122_nochipID5.C45.csv.gz
GSM1657889_1772078217.C47.csv.gz  GSM1658123_nochipID5.C54.csv.gz
GSM1657890_1772078217.C52.csv.gz  GSM1658124_nochipID5.C65.csv.gz
GSM1657891_1772078217.C56.csv.gz  GSM1658125_nochipID5.C72.csv.gz
GSM1657892_1772078217.C58.csv.gz  GSM1658126_nochipID5.C91.csv.gz
GSM1657893_1772078217.C59.csv.gz  GSM1658127_nochipID8.C01.csv.gz
GSM1657894_1772078217.C60.csv.gz  GSM1658128_nochipID8.C02.csv.gz
GSM1657895_1772078217.C61.csv.gz  GSM1658129_nochipID8.C05.csv.gz
GSM1657896_1772078217.C66.csv.gz  GSM1658130_nochipID8.C06.csv.gz
GSM1657897_1772078217.C72.csv.gz  GSM1658131_nochipID8.C08.csv.gz
GSM1657898_1772078217.C80.csv.gz  GSM1658132_nochipID8.C11.csv.gz
GSM1657899_1772078217.C87.csv.gz  GSM1658133_nochipID8.C12.csv.gz
GSM1657900_1772078217.C89.csv.gz  GSM1658134_nochipID8.C13.csv.gz
GSM1657901_1772078217.C91.csv.gz  GSM1658135_nochipID8.C14.csv.gz
GSM1657902_1772078217.C94.csv.gz  GSM1658136_nochipID8.C17.csv.gz
GSM1657903_1772078217.C96.csv.gz  GSM1658137_nochipID8.C19.csv.gz
GSM1657904_1772078218.C05.csv.gz  GSM1658138_nochipID8.C20.csv.gz
GSM1657905_1772078218.C08.csv.gz  GSM1658139_nochipID8.C22.csv.gz
GSM1657906_1772078218.C09.csv.gz  GSM1658140_nochipID8.C23.csv.gz
GSM1657907_1772078218.C13.csv.gz  GSM1658141_nochipID8.C25.csv.gz
GSM1657908_1772078218.C18.csv.gz  GSM1658142_nochipID8.C28.csv.gz
GSM1657909_1772078218.C23.csv.gz  GSM1658143_nochipID8.C29.csv.gz
GSM1657910_1772078218.C27.csv.gz  GSM1658144_nochipID8.C30.csv.gz
GSM1657911_1772078218.C29.csv.gz  GSM1658145_nochipID8.C32.csv.gz
GSM1657912_1772078218.C32.csv.gz  GSM1658146_nochipID8.C36.csv.gz
GSM1657913_1772078218.C34.csv.gz  GSM1658147_nochipID8.C37.csv.gz
GSM1657914_1772078218.C43.csv.gz  GSM1658148_nochipID8.C39.csv.gz
GSM1657915_1772078218.C44.csv.gz  GSM1658149_nochipID8.C40.csv.gz
GSM1657916_1772078218.C47.csv.gz  GSM1658150_nochipID8.C41.csv.gz
GSM1657917_1772078218.C48.csv.gz  GSM1658151_nochipID8.C42.csv.gz
GSM1657918_1772078218.C53.csv.gz  GSM1658152_nochipID8.C43.csv.gz
GSM1657919_1772078218.C55.csv.gz  GSM1658153_nochipID8.C45.csv.gz
GSM1657920_1772078218.C56.csv.gz  GSM1658154_nochipID8.C48.csv.gz
GSM1657921_1772078218.C58.csv.gz  GSM1658155_nochipID8.C49.csv.gz
GSM1657922_1772078218.C61.csv.gz  GSM1658156_nochipID8.C50.csv.gz
GSM1657923_1772078218.C71.csv.gz  GSM1658157_nochipID8.C52.csv.gz
GSM1657924_1772078218.C85.csv.gz  GSM1658158_nochipID8.C53.csv.gz
GSM1657925_1772078218.C88.csv.gz  GSM1658159_nochipID8.C54.csv.gz
GSM1657926_1772078218.C89.csv.gz  GSM1658160_nochipID8.C55.csv.gz
GSM1657927_1772078218.C93.csv.gz  GSM1658161_nochipID8.C56.csv.gz
GSM1657928_1772078218.C95.csv.gz  GSM1658162_nochipID8.C57.csv.gz
GSM1657929_1772078236.C11.csv.gz  GSM1658163_nochipID8.C60.csv.gz
GSM1657930_1772078236.C12.csv.gz  GSM1658164_nochipID8.C61.csv.gz
GSM1657931_1772078236.C16.csv.gz  GSM1658165_nochipID8.C64.csv.gz
GSM1657932_1772078236.C17.csv.gz  GSM1658166_nochipID8.C65.csv.gz
GSM1657933_1772078236.C19.csv.gz  GSM1658167_nochipID8.C66.csv.gz
GSM1657934_1772078236.C25.csv.gz  GSM1658168_nochipID8.C67.csv.gz
GSM1657935_1772078236.C26.csv.gz  GSM1658169_nochipID8.C70.csv.gz
GSM1657936_1772078236.C27.csv.gz  GSM1658170_nochipID8.C75.csv.gz
GSM1657937_1772078236.C28.csv.gz  GSM1658171_nochipID8.C77.csv.gz
GSM1657938_1772078236.C30.csv.gz  GSM1658172_nochipID8.C78.csv.gz
GSM1657939_1772078236.C31.csv.gz  GSM1658173_nochipID8.C79.csv.gz
GSM1657940_1772078236.C32.csv.gz  GSM1658174_nochipID8.C81.csv.gz
GSM1657941_1772078236.C33.csv.gz  GSM1658175_nochipID8.C82.csv.gz
GSM1657942_1772078236.C34.csv.gz  GSM1658176_nochipID8.C84.csv.gz
GSM1657943_1772078236.C36.csv.gz  GSM1658177_nochipID8.C85.csv.gz
GSM1657944_1772078236.C38.csv.gz  GSM1658178_nochipID8.C87.csv.gz
GSM1657945_1772078236.C40.csv.gz  GSM1658179_nochipID8.C88.csv.gz
GSM1657946_1772078236.C41.csv.gz  GSM1658180_nochipID8.C90.csv.gz
GSM1657947_1772078236.C45.csv.gz  GSM1658181_nochipID8.C92.csv.gz
GSM1657948_1772078236.C46.csv.gz  GSM1658182_nochipID8.C95.csv.gz
GSM1657949_1772078236.C47.csv.gz  GSM1658183_nochipID8.C96.csv.gz
GSM1657950_1772078236.C49.csv.gz  GSM1658184_nochipID9.C01.csv.gz
GSM1657951_1772078236.C54.csv.gz  GSM1658185_nochipID9.C02.csv.gz
GSM1657952_1772078236.C56.csv.gz  GSM1658186_nochipID9.C04.csv.gz
GSM1657953_1772078236.C57.csv.gz  GSM1658187_nochipID9.C09.csv.gz
GSM1657954_1772078236.C61.csv.gz  GSM1658188_nochipID9.C14.csv.gz
GSM1657955_1772078236.C65.csv.gz  GSM1658189_nochipID9.C18.csv.gz
GSM1657956_1772078236.C66.csv.gz  GSM1658190_nochipID9.C26.csv.gz
GSM1657957_1772078236.C70.csv.gz  GSM1658191_nochipID9.C37.csv.gz
GSM1657958_1772078236.C74.csv.gz  GSM1658192_nochipID9.C42.csv.gz
GSM1657959_1772078236.C77.csv.gz  GSM1658193_nochipID9.C57.csv.gz
GSM1657960_1772078236.C89.csv.gz  GSM1658194_nochipID9.C59.csv.gz
GSM1657961_1772078236.C90.csv.gz  GSM1658195_nochipID9.C62.csv.gz
GSM1657962_1772078236.C93.csv.gz  GSM1658196_nochipID9.C65.csv.gz
GSM1657963_1772078237.C06.csv.gz  GSM1658197_nochipID9.C74.csv.gz
GSM1657964_1772078237.C08.csv.gz  GSM1658198_nochipID9.C75.csv.gz
GSM1657965_1772078237.C11.csv.gz  GSM1658199_nochipID9.C84.csv.gz
GSM1657966_1772078237.C13.csv.gz  GSM1658200_nochipID9.C85.csv.gz
GSM1657967_1772078237.C14.csv.gz  GSM1658201_nochipID9.C87.csv.gz
GSM1657968_1772078237.C15.csv.gz  GSM1658202_nochipID9.C92.csv.gz
GSM1657969_1772078237.C19.csv.gz  GSM1658203_nochipID10.C02.csv.gz
GSM1657970_1772078237.C21.csv.gz  GSM1658204_nochipID10.C03.csv.gz
GSM1657971_1772078237.C26.csv.gz  GSM1658205_nochipID10.C05.csv.gz
GSM1657972_1772078237.C28.csv.gz  GSM1658206_nochipID10.C07.csv.gz
GSM1657973_1772078237.C29.csv.gz  GSM1658207_nochipID10.C11.csv.gz
GSM1657974_1772078237.C31.csv.gz  GSM1658208_nochipID10.C16.csv.gz
GSM1657975_1772078237.C40.csv.gz  GSM1658209_nochipID10.C17.csv.gz
GSM1657976_1772078237.C42.csv.gz  GSM1658210_nochipID10.C19.csv.gz
GSM1657977_1772078237.C44.csv.gz  GSM1658211_nochipID10.C25.csv.gz
GSM1657978_1772078237.C47.csv.gz  GSM1658212_nochipID10.C26.csv.gz
GSM1657979_1772078237.C49.csv.gz  GSM1658213_nochipID10.C37.csv.gz
GSM1657980_1772078237.C52.csv.gz  GSM1658214_nochipID10.C39.csv.gz
GSM1657981_1772078237.C55.csv.gz  GSM1658215_nochipID10.C42.csv.gz
GSM1657982_1772078237.C56.csv.gz  GSM1658216_nochipID10.C44.csv.gz
GSM1657983_1772078237.C58.csv.gz  GSM1658217_nochipID10.C47.csv.gz
GSM1657984_1772078237.C60.csv.gz  GSM1658218_nochipID10.C49.csv.gz
GSM1657985_1772078237.C61.csv.gz  GSM1658219_nochipID10.C50.csv.gz
GSM1657986_1772078237.C62.csv.gz  GSM1658220_nochipID10.C53.csv.gz
GSM1657987_1772078237.C74.csv.gz  GSM1658221_nochipID10.C58.csv.gz
GSM1657988_1772078237.C79.csv.gz  GSM1658222_nochipID10.C65.csv.gz
GSM1657989_1772078237.C83.csv.gz  GSM1658223_nochipID10.C74.csv.gz
GSM1657990_1772078237.C84.csv.gz  GSM1658224_nochipID10.C75.csv.gz
GSM1657991_1772078237.C92.csv.gz  GSM1658225_nochipID10.C85.csv.gz
GSM1657992_nochipID12.C05.csv.gz  GSM1658226_nochipID10.C88.csv.gz
GSM1657993_nochipID12.C23.csv.gz  GSM1658227_nochipID10.C93.csv.gz
GSM1657994_nochipID12.C73.csv.gz  GSM1658228_nochipID10.C94.csv.gz
GSM1657995_nochipID12.C84.csv.gz  GSM1658229_nochipID11.C02.csv.gz
GSM1657996_nochipID12.C87.csv.gz  GSM1658230_nochipID11.C03.csv.gz
GSM1657997_nochipID14.C08.csv.gz  GSM1658231_nochipID11.C06.csv.gz
GSM1657998_nochipID14.C13.csv.gz  GSM1658232_nochipID11.C07.csv.gz
GSM1657999_nochipID14.C29.csv.gz  GSM1658233_nochipID11.C08.csv.gz
GSM1658000_nochipID14.C45.csv.gz  GSM1658234_nochipID11.C10.csv.gz
GSM1658001_nochipID14.C89.csv.gz  GSM1658235_nochipID11.C15.csv.gz
GSM1658002_nochipID15.C20.csv.gz  GSM1658236_nochipID11.C17.csv.gz
GSM1658003_nochipID15.C54.csv.gz  GSM1658237_nochipID11.C19.csv.gz
GSM1658004_nochipID15.C69.csv.gz  GSM1658238_nochipID11.C20.csv.gz
GSM1658005_nochipID15.C86.csv.gz  GSM1658239_nochipID11.C21.csv.gz
GSM1658006_nochipID2.C01.csv.gz   GSM1658240_nochipID11.C22.csv.gz
GSM1658007_nochipID2.C02.csv.gz   GSM1658241_nochipID11.C23.csv.gz
GSM1658008_nochipID2.C03.csv.gz   GSM1658242_nochipID11.C25.csv.gz
GSM1658009_nochipID2.C07.csv.gz   GSM1658243_nochipID11.C30.csv.gz
GSM1658010_nochipID2.C08.csv.gz   GSM1658244_nochipID11.C37.csv.gz
GSM1658011_nochipID2.C09.csv.gz   GSM1658245_nochipID11.C48.csv.gz
GSM1658012_nochipID2.C10.csv.gz   GSM1658246_nochipID11.C49.csv.gz
GSM1658013_nochipID2.C11.csv.gz   GSM1658247_nochipID11.C50.csv.gz
GSM1658014_nochipID2.C12.csv.gz   GSM1658248_nochipID11.C53.csv.gz
GSM1658015_nochipID2.C13.csv.gz   GSM1658249_nochipID11.C54.csv.gz
GSM1658016_nochipID2.C14.csv.gz   GSM1658251_nochipID11.C55.csv.gz
GSM1658017_nochipID2.C15.csv.gz   GSM1658253_nochipID11.C56.csv.gz
GSM1658018_nochipID2.C16.csv.gz   GSM1658255_nochipID11.C57.csv.gz
GSM1658019_nochipID2.C17.csv.gz   GSM1658257_nochipID11.C58.csv.gz
GSM1658020_nochipID2.C18.csv.gz   GSM1658259_nochipID11.C59.csv.gz
GSM1658021_nochipID2.C19.csv.gz   GSM1658262_nochipID11.C60.csv.gz
GSM1658022_nochipID2.C21.csv.gz   GSM1658264_nochipID11.C61.csv.gz
GSM1658023_nochipID2.C22.csv.gz   GSM1658266_nochipID11.C62.csv.gz
GSM1658024_nochipID2.C24.csv.gz   GSM1658268_nochipID11.C63.csv.gz
GSM1658025_nochipID2.C26.csv.gz   GSM1658270_nochipID11.C64.csv.gz
GSM1658026_nochipID2.C27.csv.gz   GSM1658272_nochipID11.C66.csv.gz
GSM1658027_nochipID2.C28.csv.gz   GSM1658275_nochipID11.C68.csv.gz
GSM1658028_nochipID2.C29.csv.gz   GSM1658277_nochipID11.C69.csv.gz
GSM1658029_nochipID2.C30.csv.gz   GSM1658279_nochipID11.C75.csv.gz
GSM1658030_nochipID2.C31.csv.gz   GSM1658281_nochipID11.C76.csv.gz
GSM1658031_nochipID2.C32.csv.gz   GSM1658284_nochipID11.C77.csv.gz
GSM1658032_nochipID2.C33.csv.gz   GSM1658286_nochipID11.C78.csv.gz
GSM1658033_nochipID2.C34.csv.gz   GSM1658288_nochipID11.C79.csv.gz
GSM1658034_nochipID2.C36.csv.gz   GSM1658290_nochipID11.C81.csv.gz
GSM1658035_nochipID2.C38.csv.gz   GSM1658292_nochipID11.C82.csv.gz
GSM1658036_nochipID2.C39.csv.gz   GSM1658294_nochipID11.C83.csv.gz
GSM1658037_nochipID2.C40.csv.gz   GSM1658297_nochipID11.C84.csv.gz
GSM1658038_nochipID2.C41.csv.gz   GSM1658299_nochipID11.C86.csv.gz
GSM1658039_nochipID2.C42.csv.gz   GSM1658301_nochipID11.C91.csv.gz
GSM1658040_nochipID2.C43.csv.gz   GSM1658304_nochipID11.C93.csv.gz
GSM1658041_nochipID2.C44.csv.gz   GSM1658305_nochipID13.C07.csv.gz
GSM1658042_nochipID2.C45.csv.gz   GSM1658306_nochipID13.C11.csv.gz
GSM1658043_nochipID2.C46.csv.gz   GSM1658307_nochipID13.C12.csv.gz
GSM1658044_nochipID2.C47.csv.gz   GSM1658308_nochipID13.C14.csv.gz
GSM1658045_nochipID2.C48.csv.gz   GSM1658309_nochipID13.C15.csv.gz
GSM1658046_nochipID2.C49.csv.gz   GSM1658310_nochipID13.C17.csv.gz
GSM1658047_nochipID2.C50.csv.gz   GSM1658311_nochipID13.C21.csv.gz
GSM1658048_nochipID2.C51.csv.gz   GSM1658312_nochipID13.C25.csv.gz
GSM1658049_nochipID2.C52.csv.gz   GSM1658313_nochipID13.C28.csv.gz
GSM1658050_nochipID2.C56.csv.gz   GSM1658314_nochipID13.C32.csv.gz
GSM1658051_nochipID2.C57.csv.gz   GSM1658315_nochipID13.C36.csv.gz
GSM1658052_nochipID2.C58.csv.gz   GSM1658316_nochipID13.C45.csv.gz
GSM1658053_nochipID2.C59.csv.gz   GSM1658317_nochipID13.C46.csv.gz
GSM1658054_nochipID2.C60.csv.gz   GSM1658318_nochipID13.C48.csv.gz
GSM1658055_nochipID2.C61.csv.gz   GSM1658319_nochipID13.C50.csv.gz
GSM1658056_nochipID2.C62.csv.gz   GSM1658320_nochipID13.C55.csv.gz
GSM1658057_nochipID2.C63.csv.gz   GSM1658321_nochipID13.C57.csv.gz
GSM1658058_nochipID2.C64.csv.gz   GSM1658322_nochipID13.C58.csv.gz
GSM1658059_nochipID2.C65.csv.gz   GSM1658323_nochipID13.C59.csv.gz
GSM1658060_nochipID2.C66.csv.gz   GSM1658324_nochipID13.C61.csv.gz
GSM1658061_nochipID2.C67.csv.gz   GSM1658325_nochipID13.C63.csv.gz
GSM1658062_nochipID2.C68.csv.gz   GSM1658326_nochipID13.C64.csv.gz
GSM1658063_nochipID2.C69.csv.gz   GSM1658327_nochipID13.C66.csv.gz
GSM1658064_nochipID2.C71.csv.gz   GSM1658328_nochipID13.C67.csv.gz
GSM1658065_nochipID2.C73.csv.gz   GSM1658329_nochipID13.C68.csv.gz
GSM1658066_nochipID2.C75.csv.gz   GSM1658330_nochipID13.C72.csv.gz
GSM1658067_nochipID2.C76.csv.gz   GSM1658331_nochipID13.C73.csv.gz
GSM1658068_nochipID2.C77.csv.gz   GSM1658332_nochipID13.C74.csv.gz
GSM1658069_nochipID2.C78.csv.gz   GSM1658333_nochipID13.C75.csv.gz
GSM1658070_nochipID2.C79.csv.gz   GSM1658334_nochipID13.C80.csv.gz
GSM1658071_nochipID2.C80.csv.gz   GSM1658335_nochipID13.C84.csv.gz
GSM1658072_nochipID2.C81.csv.gz   GSM1658336_nochipID13.C87.csv.gz
GSM1658073_nochipID2.C82.csv.gz   GSM1658337_nochipID13.C93.csv.gz
GSM1658074_nochipID2.C83.csv.gz   GSM1658338_nochipID4.C03.csv.gz
GSM1658075_nochipID2.C84.csv.gz   GSM1658339_nochipID4.C04.csv.gz
GSM1658076_nochipID2.C85.csv.gz   GSM1658340_nochipID4.C10.csv.gz
GSM1658077_nochipID2.C86.csv.gz   GSM1658341_nochipID4.C14.csv.gz
GSM1658078_nochipID2.C87.csv.gz   GSM1658342_nochipID4.C18.csv.gz
GSM1658079_nochipID2.C89.csv.gz   GSM1658343_nochipID4.C19.csv.gz
GSM1658080_nochipID2.C90.csv.gz   GSM1658344_nochipID4.C20.csv.gz
GSM1658081_nochipID2.C92.csv.gz   GSM1658345_nochipID4.C21.csv.gz
GSM1658082_nochipID2.C93.csv.gz   GSM1658346_nochipID4.C33.csv.gz
GSM1658083_nochipID3.C01.csv.gz   GSM1658347_nochipID4.C34.csv.gz
GSM1658084_nochipID3.C05.csv.gz   GSM1658348_nochipID4.C38.csv.gz
GSM1658085_nochipID3.C06.csv.gz   GSM1658349_nochipID4.C39.csv.gz
GSM1658086_nochipID3.C07.csv.gz   GSM1658350_nochipID4.C41.csv.gz
GSM1658087_nochipID3.C09.csv.gz   GSM1658351_nochipID4.C42.csv.gz
GSM1658088_nochipID3.C11.csv.gz   GSM1658352_nochipID4.C44.csv.gz
GSM1658089_nochipID3.C12.csv.gz   GSM1658353_nochipID4.C49.csv.gz
GSM1658090_nochipID3.C14.csv.gz   GSM1658354_nochipID4.C52.csv.gz
GSM1658091_nochipID3.C23.csv.gz   GSM1658355_nochipID4.C53.csv.gz
GSM1658092_nochipID3.C25.csv.gz   GSM1658356_nochipID4.C59.csv.gz
GSM1658093_nochipID3.C32.csv.gz   GSM1658357_nochipID4.C62.csv.gz
GSM1658094_nochipID3.C37.csv.gz   GSM1658358_nochipID4.C63.csv.gz
GSM1658095_nochipID3.C38.csv.gz   GSM1658359_nochipID4.C66.csv.gz
GSM1658096_nochipID3.C51.csv.gz   GSM1658360_nochipID4.C74.csv.gz
GSM1658097_nochipID3.C52.csv.gz   GSM1658361_nochipID4.C77.csv.gz
GSM1658098_nochipID3.C53.csv.gz   GSM1658362_nochipID4.C78.csv.gz
GSM1658099_nochipID3.C54.csv.gz   GSM1658363_nochipID4.C84.csv.gz
GSM1658100_nochipID3.C56.csv.gz   GSM1658364_nochipID4.C89.csv.gz
GSM1658101_nochipID3.C61.csv.gz   GSM1658365_nochipID4.C95.csv.gz
GSM1658102_nochipID3.C62.csv.gz   GSM1658366_nochipID4.C96.csv.gz
GSM1658103_nochipID3.C65.csv.gz

So now we have the unzipped version of the file, "GSE41265_allGenesTPM.txt". I wonder how much space they saved by zipping it?

Let's use the flags "-l" for "long listing" which will show us the sizes


In [3]:
ls -1


1.0_Introduction_to_bioinformatics.ipynb
1.1_Overview_of_analysis_steps.ipynb
1.2_Downloading_public_data_Shalek2013.ipynb
1.3_Single-cell_overview_additional_reading.ipynb
1.X_unused_code.ipynb
2.2_Introduction_to_dimensionality_reduction.ipynb
2.4_ICA.ipynb
2.5_Manifold_learning.ipynb
2.6_Compare_dimensionality_reduction.ipynb
2.7_Apply_dimensionality_reduction_on_Shalek2013_Macaulay2016.ipynb
2.8_Additional_reading.ipynb
2016-06-09_darmanis2015_concatenate_data.ipynb
3.0_Introduction_to_clustering.ipynb
3.1_Classifiers.ipynb
3.2_Hierarchical_clustering.ipynb
3.3_Apply_clustering_to_Shalek2013_Macaulay2016.ipynb
3.X_Unused.ipynb
4.0_Introduction_to_classifiers.ipynb
4.3_Apply_classifiers_to_Shalek2013_Macaulay2016_with_gene_ontology.ipynb
4.4_Additional_reading.ipynb
5.0_Pseudotime_introduction.ipynb
5.1_Pseudotime_ordering_algorithms_overiew.ipynb
5.3_Pseudotime_additional_reading.ipynb
6.1_Dealing_with_technical_noise.ipynb
6.2_Batch_Correction.ipynb
6.3_Technical_noise_additional_reading.ipynb
7.0_Case_Study_Macaulay2016.ipynb
7.1_Playing_with_analysis_decisions_in_Macaulay2016.ipynb
7.2_Reproducing_Shalek2013_figures.ipynb
8.0_Plotting_tips.ipynb
GSE41265_allGenesTPM.txt
GSE41265_allGenesTPM.txt.gz
combat_py/
figures/
papers/
shalek2013_pca.pdf

oof, this is in pure bytes and I can't convert to multiples of 1024 easily in my head (1024 bytes = 1 kilobyte, 1024 kilobytes = 1 megabtye, etc - the 1000/byte is a lie that the hard drive companies use!). So let's use the -h flag, which tells the computer to do th conversion for us. We can combine multiple flags with the same dash, so

ls -l -h

Can be shortened to:

ls -lh

In [12]:
! ls -1 | wc -l


     466

In [8]:
! gunzip --help


Apple gzip 251
usage: gunzip [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]
 -1 --fast            fastest (worst) compression
 -2 .. -8             set compression level
 -9 --best            best (slowest) compression
 -c --stdout          write to stdout, keep original files
    --to-stdout
 -d --decompress      uncompress files
    --uncompress
 -f --force           force overwriting & compress links
 -h --help            display this help
 -k --keep            don't delete input files during operation
 -l --list            list compressed file contents
 -N --name            save or restore original file name and time stamp
 -n --no-name         don't save original file name or time stamp
 -q --quiet           output no warnings
 -r --recursive       recursively compress files in directories
 -S .suf              use suffix .suf instead of .gz
    --suffix .suf
 -t --test            test compressed file
 -V --version         display program version
 -v --verbose         print extra statistics

See, "GSE41265_allGenesTPM.txt.gz" is there!

Since the file ends in ".gz", this tells us its a "gnu-zipped" or "gzipped" ("gee-zipped") file, which is a specific flavor of "zipping" or compressing a file. We need to use a gnu-zipping-aware program to decompress the file, which is "gunzip" ("gnu-unzip").

Run the next cell to unzip the file


In [9]:
! gunzip -f *gz

In [5]:
3+3


Out[5]:
6

In [1]:
asdf = 'beyonce'
asdf


Out[1]:
'beyonce'

In [2]:
asdf + ' runs the world'


Out[2]:
'beyonce runs the world'

Let's "ls" again to see what files have changed


In [10]:
ls


GSM1657871_1772078217.C03.csv  GSM1658104_nochipID3.C69.csv
GSM1657872_1772078217.C04.csv  GSM1658105_nochipID3.C71.csv
GSM1657873_1772078217.C06.csv  GSM1658106_nochipID3.C72.csv
GSM1657874_1772078217.C07.csv  GSM1658107_nochipID3.C74.csv
GSM1657875_1772078217.C08.csv  GSM1658108_nochipID3.C75.csv
GSM1657876_1772078217.C09.csv  GSM1658109_nochipID3.C76.csv
GSM1657877_1772078217.C14.csv  GSM1658110_nochipID3.C80.csv
GSM1657878_1772078217.C16.csv  GSM1658111_nochipID3.C81.csv
GSM1657879_1772078217.C17.csv  GSM1658112_nochipID3.C84.csv
GSM1657880_1772078217.C18.csv  GSM1658113_nochipID3.C85.csv
GSM1657881_1772078217.C20.csv  GSM1658114_nochipID3.C86.csv
GSM1657882_1772078217.C23.csv  GSM1658115_nochipID3.C91.csv
GSM1657883_1772078217.C28.csv  GSM1658116_nochipID5.C04.csv
GSM1657884_1772078217.C29.csv  GSM1658117_nochipID5.C05.csv
GSM1657885_1772078217.C32.csv  GSM1658118_nochipID5.C18.csv
GSM1657886_1772078217.C33.csv  GSM1658119_nochipID5.C19.csv
GSM1657887_1772078217.C39.csv  GSM1658120_nochipID5.C42.csv
GSM1657888_1772078217.C40.csv  GSM1658121_nochipID5.C44.csv
GSM1657889_1772078217.C47.csv  GSM1658122_nochipID5.C45.csv
GSM1657890_1772078217.C52.csv  GSM1658123_nochipID5.C54.csv
GSM1657891_1772078217.C56.csv  GSM1658124_nochipID5.C65.csv
GSM1657892_1772078217.C58.csv  GSM1658125_nochipID5.C72.csv
GSM1657893_1772078217.C59.csv  GSM1658126_nochipID5.C91.csv
GSM1657894_1772078217.C60.csv  GSM1658127_nochipID8.C01.csv
GSM1657895_1772078217.C61.csv  GSM1658128_nochipID8.C02.csv
GSM1657896_1772078217.C66.csv  GSM1658129_nochipID8.C05.csv
GSM1657897_1772078217.C72.csv  GSM1658130_nochipID8.C06.csv
GSM1657898_1772078217.C80.csv  GSM1658131_nochipID8.C08.csv
GSM1657899_1772078217.C87.csv  GSM1658132_nochipID8.C11.csv
GSM1657900_1772078217.C89.csv  GSM1658133_nochipID8.C12.csv
GSM1657901_1772078217.C91.csv  GSM1658134_nochipID8.C13.csv
GSM1657902_1772078217.C94.csv  GSM1658135_nochipID8.C14.csv
GSM1657903_1772078217.C96.csv  GSM1658136_nochipID8.C17.csv
GSM1657904_1772078218.C05.csv  GSM1658137_nochipID8.C19.csv
GSM1657905_1772078218.C08.csv  GSM1658138_nochipID8.C20.csv
GSM1657906_1772078218.C09.csv  GSM1658139_nochipID8.C22.csv
GSM1657907_1772078218.C13.csv  GSM1658140_nochipID8.C23.csv
GSM1657908_1772078218.C18.csv  GSM1658141_nochipID8.C25.csv
GSM1657909_1772078218.C23.csv  GSM1658142_nochipID8.C28.csv
GSM1657910_1772078218.C27.csv  GSM1658143_nochipID8.C29.csv
GSM1657911_1772078218.C29.csv  GSM1658144_nochipID8.C30.csv
GSM1657912_1772078218.C32.csv  GSM1658145_nochipID8.C32.csv
GSM1657913_1772078218.C34.csv  GSM1658146_nochipID8.C36.csv
GSM1657914_1772078218.C43.csv  GSM1658147_nochipID8.C37.csv
GSM1657915_1772078218.C44.csv  GSM1658148_nochipID8.C39.csv
GSM1657916_1772078218.C47.csv  GSM1658149_nochipID8.C40.csv
GSM1657917_1772078218.C48.csv  GSM1658150_nochipID8.C41.csv
GSM1657918_1772078218.C53.csv  GSM1658151_nochipID8.C42.csv
GSM1657919_1772078218.C55.csv  GSM1658152_nochipID8.C43.csv
GSM1657920_1772078218.C56.csv  GSM1658153_nochipID8.C45.csv
GSM1657921_1772078218.C58.csv  GSM1658154_nochipID8.C48.csv
GSM1657922_1772078218.C61.csv  GSM1658155_nochipID8.C49.csv
GSM1657923_1772078218.C71.csv  GSM1658156_nochipID8.C50.csv
GSM1657924_1772078218.C85.csv  GSM1658157_nochipID8.C52.csv
GSM1657925_1772078218.C88.csv  GSM1658158_nochipID8.C53.csv
GSM1657926_1772078218.C89.csv  GSM1658159_nochipID8.C54.csv
GSM1657927_1772078218.C93.csv  GSM1658160_nochipID8.C55.csv
GSM1657928_1772078218.C95.csv  GSM1658161_nochipID8.C56.csv
GSM1657929_1772078236.C11.csv  GSM1658162_nochipID8.C57.csv
GSM1657930_1772078236.C12.csv  GSM1658163_nochipID8.C60.csv
GSM1657931_1772078236.C16.csv  GSM1658164_nochipID8.C61.csv
GSM1657932_1772078236.C17.csv  GSM1658165_nochipID8.C64.csv
GSM1657933_1772078236.C19.csv  GSM1658166_nochipID8.C65.csv
GSM1657934_1772078236.C25.csv  GSM1658167_nochipID8.C66.csv
GSM1657935_1772078236.C26.csv  GSM1658168_nochipID8.C67.csv
GSM1657936_1772078236.C27.csv  GSM1658169_nochipID8.C70.csv
GSM1657937_1772078236.C28.csv  GSM1658170_nochipID8.C75.csv
GSM1657938_1772078236.C30.csv  GSM1658171_nochipID8.C77.csv
GSM1657939_1772078236.C31.csv  GSM1658172_nochipID8.C78.csv
GSM1657940_1772078236.C32.csv  GSM1658173_nochipID8.C79.csv
GSM1657941_1772078236.C33.csv  GSM1658174_nochipID8.C81.csv
GSM1657942_1772078236.C34.csv  GSM1658175_nochipID8.C82.csv
GSM1657943_1772078236.C36.csv  GSM1658176_nochipID8.C84.csv
GSM1657944_1772078236.C38.csv  GSM1658177_nochipID8.C85.csv
GSM1657945_1772078236.C40.csv  GSM1658178_nochipID8.C87.csv
GSM1657946_1772078236.C41.csv  GSM1658179_nochipID8.C88.csv
GSM1657947_1772078236.C45.csv  GSM1658180_nochipID8.C90.csv
GSM1657948_1772078236.C46.csv  GSM1658181_nochipID8.C92.csv
GSM1657949_1772078236.C47.csv  GSM1658182_nochipID8.C95.csv
GSM1657950_1772078236.C49.csv  GSM1658183_nochipID8.C96.csv
GSM1657951_1772078236.C54.csv  GSM1658184_nochipID9.C01.csv
GSM1657952_1772078236.C56.csv  GSM1658185_nochipID9.C02.csv
GSM1657953_1772078236.C57.csv  GSM1658186_nochipID9.C04.csv
GSM1657954_1772078236.C61.csv  GSM1658187_nochipID9.C09.csv
GSM1657955_1772078236.C65.csv  GSM1658188_nochipID9.C14.csv
GSM1657956_1772078236.C66.csv  GSM1658189_nochipID9.C18.csv
GSM1657957_1772078236.C70.csv  GSM1658190_nochipID9.C26.csv
GSM1657958_1772078236.C74.csv  GSM1658191_nochipID9.C37.csv
GSM1657959_1772078236.C77.csv  GSM1658192_nochipID9.C42.csv
GSM1657960_1772078236.C89.csv  GSM1658193_nochipID9.C57.csv
GSM1657961_1772078236.C90.csv  GSM1658194_nochipID9.C59.csv
GSM1657962_1772078236.C93.csv  GSM1658195_nochipID9.C62.csv
GSM1657963_1772078237.C06.csv  GSM1658196_nochipID9.C65.csv
GSM1657964_1772078237.C08.csv  GSM1658197_nochipID9.C74.csv
GSM1657965_1772078237.C11.csv  GSM1658198_nochipID9.C75.csv
GSM1657966_1772078237.C13.csv  GSM1658199_nochipID9.C84.csv
GSM1657967_1772078237.C14.csv  GSM1658200_nochipID9.C85.csv
GSM1657968_1772078237.C15.csv  GSM1658201_nochipID9.C87.csv
GSM1657969_1772078237.C19.csv  GSM1658202_nochipID9.C92.csv
GSM1657970_1772078237.C21.csv  GSM1658203_nochipID10.C02.csv
GSM1657971_1772078237.C26.csv  GSM1658204_nochipID10.C03.csv
GSM1657972_1772078237.C28.csv  GSM1658205_nochipID10.C05.csv
GSM1657973_1772078237.C29.csv  GSM1658206_nochipID10.C07.csv
GSM1657974_1772078237.C31.csv  GSM1658207_nochipID10.C11.csv
GSM1657975_1772078237.C40.csv  GSM1658208_nochipID10.C16.csv
GSM1657976_1772078237.C42.csv  GSM1658209_nochipID10.C17.csv
GSM1657977_1772078237.C44.csv  GSM1658210_nochipID10.C19.csv
GSM1657978_1772078237.C47.csv  GSM1658211_nochipID10.C25.csv
GSM1657979_1772078237.C49.csv  GSM1658212_nochipID10.C26.csv
GSM1657980_1772078237.C52.csv  GSM1658213_nochipID10.C37.csv
GSM1657981_1772078237.C55.csv  GSM1658214_nochipID10.C39.csv
GSM1657982_1772078237.C56.csv  GSM1658215_nochipID10.C42.csv
GSM1657983_1772078237.C58.csv  GSM1658216_nochipID10.C44.csv
GSM1657984_1772078237.C60.csv  GSM1658217_nochipID10.C47.csv
GSM1657985_1772078237.C61.csv  GSM1658218_nochipID10.C49.csv
GSM1657986_1772078237.C62.csv  GSM1658219_nochipID10.C50.csv
GSM1657987_1772078237.C74.csv  GSM1658220_nochipID10.C53.csv
GSM1657988_1772078237.C79.csv  GSM1658221_nochipID10.C58.csv
GSM1657989_1772078237.C83.csv  GSM1658222_nochipID10.C65.csv
GSM1657990_1772078237.C84.csv  GSM1658223_nochipID10.C74.csv
GSM1657991_1772078237.C92.csv  GSM1658224_nochipID10.C75.csv
GSM1657992_nochipID12.C05.csv  GSM1658225_nochipID10.C85.csv
GSM1657993_nochipID12.C23.csv  GSM1658226_nochipID10.C88.csv
GSM1657994_nochipID12.C73.csv  GSM1658227_nochipID10.C93.csv
GSM1657995_nochipID12.C84.csv  GSM1658228_nochipID10.C94.csv
GSM1657996_nochipID12.C87.csv  GSM1658229_nochipID11.C02.csv
GSM1657997_nochipID14.C08.csv  GSM1658230_nochipID11.C03.csv
GSM1657998_nochipID14.C13.csv  GSM1658231_nochipID11.C06.csv
GSM1657999_nochipID14.C29.csv  GSM1658232_nochipID11.C07.csv
GSM1658000_nochipID14.C45.csv  GSM1658233_nochipID11.C08.csv
GSM1658001_nochipID14.C89.csv  GSM1658234_nochipID11.C10.csv
GSM1658002_nochipID15.C20.csv  GSM1658235_nochipID11.C15.csv
GSM1658003_nochipID15.C54.csv  GSM1658236_nochipID11.C17.csv
GSM1658004_nochipID15.C69.csv  GSM1658237_nochipID11.C19.csv
GSM1658005_nochipID15.C86.csv  GSM1658238_nochipID11.C20.csv
GSM1658006_nochipID2.C01.csv   GSM1658239_nochipID11.C21.csv
GSM1658007_nochipID2.C02.csv   GSM1658240_nochipID11.C22.csv
GSM1658008_nochipID2.C03.csv   GSM1658241_nochipID11.C23.csv
GSM1658009_nochipID2.C07.csv   GSM1658242_nochipID11.C25.csv
GSM1658010_nochipID2.C08.csv   GSM1658243_nochipID11.C30.csv
GSM1658011_nochipID2.C09.csv   GSM1658244_nochipID11.C37.csv
GSM1658012_nochipID2.C10.csv   GSM1658245_nochipID11.C48.csv
GSM1658013_nochipID2.C11.csv   GSM1658246_nochipID11.C49.csv
GSM1658014_nochipID2.C12.csv   GSM1658247_nochipID11.C50.csv
GSM1658015_nochipID2.C13.csv   GSM1658248_nochipID11.C53.csv
GSM1658016_nochipID2.C14.csv   GSM1658249_nochipID11.C54.csv
GSM1658017_nochipID2.C15.csv   GSM1658251_nochipID11.C55.csv
GSM1658018_nochipID2.C16.csv   GSM1658253_nochipID11.C56.csv
GSM1658019_nochipID2.C17.csv   GSM1658255_nochipID11.C57.csv
GSM1658020_nochipID2.C18.csv   GSM1658257_nochipID11.C58.csv
GSM1658021_nochipID2.C19.csv   GSM1658259_nochipID11.C59.csv
GSM1658022_nochipID2.C21.csv   GSM1658262_nochipID11.C60.csv
GSM1658023_nochipID2.C22.csv   GSM1658264_nochipID11.C61.csv
GSM1658024_nochipID2.C24.csv   GSM1658266_nochipID11.C62.csv
GSM1658025_nochipID2.C26.csv   GSM1658268_nochipID11.C63.csv
GSM1658026_nochipID2.C27.csv   GSM1658270_nochipID11.C64.csv
GSM1658027_nochipID2.C28.csv   GSM1658272_nochipID11.C66.csv
GSM1658028_nochipID2.C29.csv   GSM1658275_nochipID11.C68.csv
GSM1658029_nochipID2.C30.csv   GSM1658277_nochipID11.C69.csv
GSM1658030_nochipID2.C31.csv   GSM1658279_nochipID11.C75.csv
GSM1658031_nochipID2.C32.csv   GSM1658281_nochipID11.C76.csv
GSM1658032_nochipID2.C33.csv   GSM1658284_nochipID11.C77.csv
GSM1658033_nochipID2.C34.csv   GSM1658286_nochipID11.C78.csv
GSM1658034_nochipID2.C36.csv   GSM1658288_nochipID11.C79.csv
GSM1658035_nochipID2.C38.csv   GSM1658290_nochipID11.C81.csv
GSM1658036_nochipID2.C39.csv   GSM1658292_nochipID11.C82.csv
GSM1658037_nochipID2.C40.csv   GSM1658294_nochipID11.C83.csv
GSM1658038_nochipID2.C41.csv   GSM1658297_nochipID11.C84.csv
GSM1658039_nochipID2.C42.csv   GSM1658299_nochipID11.C86.csv
GSM1658040_nochipID2.C43.csv   GSM1658301_nochipID11.C91.csv
GSM1658041_nochipID2.C44.csv   GSM1658304_nochipID11.C93.csv
GSM1658042_nochipID2.C45.csv   GSM1658305_nochipID13.C07.csv
GSM1658043_nochipID2.C46.csv   GSM1658306_nochipID13.C11.csv
GSM1658044_nochipID2.C47.csv   GSM1658307_nochipID13.C12.csv
GSM1658045_nochipID2.C48.csv   GSM1658308_nochipID13.C14.csv
GSM1658046_nochipID2.C49.csv   GSM1658309_nochipID13.C15.csv
GSM1658047_nochipID2.C50.csv   GSM1658310_nochipID13.C17.csv
GSM1658048_nochipID2.C51.csv   GSM1658311_nochipID13.C21.csv
GSM1658049_nochipID2.C52.csv   GSM1658312_nochipID13.C25.csv
GSM1658050_nochipID2.C56.csv   GSM1658313_nochipID13.C28.csv
GSM1658051_nochipID2.C57.csv   GSM1658314_nochipID13.C32.csv
GSM1658052_nochipID2.C58.csv   GSM1658315_nochipID13.C36.csv
GSM1658053_nochipID2.C59.csv   GSM1658316_nochipID13.C45.csv
GSM1658054_nochipID2.C60.csv   GSM1658317_nochipID13.C46.csv
GSM1658055_nochipID2.C61.csv   GSM1658318_nochipID13.C48.csv
GSM1658056_nochipID2.C62.csv   GSM1658319_nochipID13.C50.csv
GSM1658057_nochipID2.C63.csv   GSM1658320_nochipID13.C55.csv
GSM1658058_nochipID2.C64.csv   GSM1658321_nochipID13.C57.csv
GSM1658059_nochipID2.C65.csv   GSM1658322_nochipID13.C58.csv
GSM1658060_nochipID2.C66.csv   GSM1658323_nochipID13.C59.csv
GSM1658061_nochipID2.C67.csv   GSM1658324_nochipID13.C61.csv
GSM1658062_nochipID2.C68.csv   GSM1658325_nochipID13.C63.csv
GSM1658063_nochipID2.C69.csv   GSM1658326_nochipID13.C64.csv
GSM1658064_nochipID2.C71.csv   GSM1658327_nochipID13.C66.csv
GSM1658065_nochipID2.C73.csv   GSM1658328_nochipID13.C67.csv
GSM1658066_nochipID2.C75.csv   GSM1658329_nochipID13.C68.csv
GSM1658067_nochipID2.C76.csv   GSM1658330_nochipID13.C72.csv
GSM1658068_nochipID2.C77.csv   GSM1658331_nochipID13.C73.csv
GSM1658069_nochipID2.C78.csv   GSM1658332_nochipID13.C74.csv
GSM1658070_nochipID2.C79.csv   GSM1658333_nochipID13.C75.csv
GSM1658071_nochipID2.C80.csv   GSM1658334_nochipID13.C80.csv
GSM1658072_nochipID2.C81.csv   GSM1658335_nochipID13.C84.csv
GSM1658073_nochipID2.C82.csv   GSM1658336_nochipID13.C87.csv
GSM1658074_nochipID2.C83.csv   GSM1658337_nochipID13.C93.csv
GSM1658075_nochipID2.C84.csv   GSM1658338_nochipID4.C03.csv
GSM1658076_nochipID2.C85.csv   GSM1658339_nochipID4.C04.csv
GSM1658077_nochipID2.C86.csv   GSM1658340_nochipID4.C10.csv
GSM1658078_nochipID2.C87.csv   GSM1658341_nochipID4.C14.csv
GSM1658079_nochipID2.C89.csv   GSM1658342_nochipID4.C18.csv
GSM1658080_nochipID2.C90.csv   GSM1658343_nochipID4.C19.csv
GSM1658081_nochipID2.C92.csv   GSM1658344_nochipID4.C20.csv
GSM1658082_nochipID2.C93.csv   GSM1658345_nochipID4.C21.csv
GSM1658083_nochipID3.C01.csv   GSM1658346_nochipID4.C33.csv
GSM1658084_nochipID3.C05.csv   GSM1658347_nochipID4.C34.csv
GSM1658085_nochipID3.C06.csv   GSM1658348_nochipID4.C38.csv
GSM1658086_nochipID3.C07.csv   GSM1658349_nochipID4.C39.csv
GSM1658087_nochipID3.C09.csv   GSM1658350_nochipID4.C41.csv
GSM1658088_nochipID3.C11.csv   GSM1658351_nochipID4.C42.csv
GSM1658089_nochipID3.C12.csv   GSM1658352_nochipID4.C44.csv
GSM1658090_nochipID3.C14.csv   GSM1658353_nochipID4.C49.csv
GSM1658091_nochipID3.C23.csv   GSM1658354_nochipID4.C52.csv
GSM1658092_nochipID3.C25.csv   GSM1658355_nochipID4.C53.csv
GSM1658093_nochipID3.C32.csv   GSM1658356_nochipID4.C59.csv
GSM1658094_nochipID3.C37.csv   GSM1658357_nochipID4.C62.csv
GSM1658095_nochipID3.C38.csv   GSM1658358_nochipID4.C63.csv
GSM1658096_nochipID3.C51.csv   GSM1658359_nochipID4.C66.csv
GSM1658097_nochipID3.C52.csv   GSM1658360_nochipID4.C74.csv
GSM1658098_nochipID3.C53.csv   GSM1658361_nochipID4.C77.csv
GSM1658099_nochipID3.C54.csv   GSM1658362_nochipID4.C78.csv
GSM1658100_nochipID3.C56.csv   GSM1658363_nochipID4.C84.csv
GSM1658101_nochipID3.C61.csv   GSM1658364_nochipID4.C89.csv
GSM1658102_nochipID3.C62.csv   GSM1658365_nochipID4.C95.csv
GSM1658103_nochipID3.C65.csv   GSM1658366_nochipID4.C96.csv

In [11]:
! head GSM1657872_1772078217.C04.csv


1/2-SBSRNA4 	 0
A1BG 	 0
A1BG-AS1 	 0
A1CF 	 0
A2LD1 	 0
A2M 	 18
A2ML1 	 0
A2MP1 	 0
A4GALT 	 0
A4GNT 	 0

In [15]:
import glob

import pandas as pd

In [17]:
pd.read_table('GSM1657872_1772078217.C04.csv')


Out[17]:
1/2-SBSRNA4 0
0 A1BG 0
1 A1BG-AS1 0
2 A1CF 0
3 A2LD1 0
4 A2M 18
5 A2ML1 0
6 A2MP1 0
7 A4GALT 0
8 A4GNT 0
9 AA06 0
10 AAA1 0
11 AAAS 0
12 AACS 0
13 AACSP1 0
14 AADAC 0
15 AADACL2 0
16 AADACL3 0
17 AADACL4 0
18 AADAT 0
19 AAGAB 572
20 AAK1 174
21 AAMP 1
22 AANAT 0
23 AARS 8
24 AARS2 0
25 AARSD1 57
26 AASDH 0
27 AASDHPPT 0
28 AASS 0
29 AATF 0
... ... ...
22057 ZSCAN22 285
22058 ZSCAN23 0
22059 ZSCAN29 4
22060 ZSCAN30 0
22061 ZSCAN4 0
22062 ZSCAN5A 0
22063 ZSCAN5B 0
22064 ZSWIM1 0
22065 ZSWIM2 0
22066 ZSWIM3 0
22067 ZSWIM4 0
22068 ZSWIM5 0
22069 ZSWIM6 69
22070 ZSWIM7 0
22071 ZUFSP 0
22072 ZW10 0
22073 ZWILCH 2
22074 ZWINT 0
22075 ZXDA 0
22076 ZXDB 0
22077 ZXDC 5
22078 ZYG11A 0
22079 ZYG11B 197
22080 ZYX 0
22081 ZZEF1 0
22082 ZZZ3 58
22083 tAKR 0
22084 no_feature 970221
22085 ambiguous 5771
22086 alignment_not_unique 847047

22087 rows × 2 columns


In [18]:
pd.read_table('GSM1657872_1772078217.C04.csv', index_col=0)


Out[18]:
0
1/2-SBSRNA4
A1BG 0
A1BG-AS1 0
A1CF 0
A2LD1 0
A2M 18
A2ML1 0
A2MP1 0
A4GALT 0
A4GNT 0
AA06 0
AAA1 0
AAAS 0
AACS 0
AACSP1 0
AADAC 0
AADACL2 0
AADACL3 0
AADACL4 0
AADAT 0
AAGAB 572
AAK1 174
AAMP 1
AANAT 0
AARS 8
AARS2 0
AARSD1 57
AASDH 0
AASDHPPT 0
AASS 0
AATF 0
... ...
ZSCAN22 285
ZSCAN23 0
ZSCAN29 4
ZSCAN30 0
ZSCAN4 0
ZSCAN5A 0
ZSCAN5B 0
ZSWIM1 0
ZSWIM2 0
ZSWIM3 0
ZSWIM4 0
ZSWIM5 0
ZSWIM6 69
ZSWIM7 0
ZUFSP 0
ZW10 0
ZWILCH 2
ZWINT 0
ZXDA 0
ZXDB 0
ZXDC 5
ZYG11A 0
ZYG11B 197
ZYX 0
ZZEF1 0
ZZZ3 58
tAKR 0
no_feature 970221
ambiguous 5771
alignment_not_unique 847047

22087 rows × 1 columns


In [22]:
dataframe = pd.read_table('GSM1657872_1772078217.C04.csv', index_col=0, header=None)
dataframe


Out[22]:
1
0
1/2-SBSRNA4 0
A1BG 0
A1BG-AS1 0
A1CF 0
A2LD1 0
A2M 18
A2ML1 0
A2MP1 0
A4GALT 0
A4GNT 0
AA06 0
AAA1 0
AAAS 0
AACS 0
AACSP1 0
AADAC 0
AADACL2 0
AADACL3 0
AADACL4 0
AADAT 0
AAGAB 572
AAK1 174
AAMP 1
AANAT 0
AARS 8
AARS2 0
AARSD1 57
AASDH 0
AASDHPPT 0
AASS 0
... ...
ZSCAN22 285
ZSCAN23 0
ZSCAN29 4
ZSCAN30 0
ZSCAN4 0
ZSCAN5A 0
ZSCAN5B 0
ZSWIM1 0
ZSWIM2 0
ZSWIM3 0
ZSWIM4 0
ZSWIM5 0
ZSWIM6 69
ZSWIM7 0
ZUFSP 0
ZW10 0
ZWILCH 2
ZWINT 0
ZXDA 0
ZXDB 0
ZXDC 5
ZYG11A 0
ZYG11B 197
ZYX 0
ZZEF1 0
ZZZ3 58
tAKR 0
no_feature 970221
ambiguous 5771
alignment_not_unique 847047

22088 rows × 1 columns


In [21]:
series = pd.read_table('GSM1657872_1772078217.C04.csv', index_col=0, header=None, squeeze=True)
series


Out[21]:
0
1/2-SBSRNA4                   0
A1BG                          0
A1BG-AS1                      0
A1CF                          0
A2LD1                         0
A2M                          18
A2ML1                         0
A2MP1                         0
A4GALT                        0
A4GNT                         0
AA06                          0
AAA1                          0
AAAS                          0
AACS                          0
AACSP1                        0
AADAC                         0
AADACL2                       0
AADACL3                       0
AADACL4                       0
AADAT                         0
AAGAB                       572
AAK1                        174
AAMP                          1
AANAT                         0
AARS                          8
AARS2                         0
AARSD1                       57
AASDH                         0
AASDHPPT                      0
AASS                          0
                          ...  
ZSCAN22                     285
ZSCAN23                       0
ZSCAN29                       4
ZSCAN30                       0
ZSCAN4                        0
ZSCAN5A                       0
ZSCAN5B                       0
ZSWIM1                        0
ZSWIM2                        0
ZSWIM3                        0
ZSWIM4                        0
ZSWIM5                        0
ZSWIM6                       69
ZSWIM7                        0
ZUFSP                         0
ZW10                          0
ZWILCH                        2
ZWINT                         0
ZXDA                          0
ZXDB                          0
ZXDC                          5
ZYG11A                        0
ZYG11B                      197
ZYX                           0
ZZEF1                         0
ZZZ3                         58
tAKR                          0
no_feature               970221
ambiguous                  5771
alignment_not_unique     847047
Name: 1, dtype: int64

In [23]:
dataframe.shape


Out[23]:
(22088, 1)

In [24]:
series.shape


Out[24]:
(22088,)

In [26]:
series.name


Out[26]:
1

In [28]:
filename = 'GSM1657872_1772078217.C04.csv'
filename


Out[28]:
'GSM1657872_1772078217.C04.csv'

In [29]:
filename.split('.')


Out[29]:
['GSM1657872_1772078217', 'C04', 'csv']

In [30]:
filename.split('.csv')


Out[30]:
['GSM1657872_1772078217.C04', '']

In [31]:
filename.split('.csv')[0]


Out[31]:
'GSM1657872_1772078217.C04'

In [81]:
cells = []

for filename in glob.iglob('*.csv'):
    cell = pd.read_table(filename, index_col=0, squeeze=True, header=None)
    name = filename.split('_')[0]
    cell.name = name
    cells.append(cell)
expression = pd.concat(cells, axis=1)
expression.index = expression.index.map(lambda x: x.strip(' '))
print(expression.shape)
expression.head()


(22088, 466)
Out[81]:
GSM1657871 GSM1657872 GSM1657873 GSM1657874 GSM1657875 GSM1657876 GSM1657877 GSM1657878 GSM1657879 GSM1657880 ... GSM1658357 GSM1658358 GSM1658359 GSM1658360 GSM1658361 GSM1658362 GSM1658363 GSM1658364 GSM1658365 GSM1658366
1/2-SBSRNA4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
A1BG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
A1BG-AS1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
A1CF 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
A2LD1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 466 columns

Read metadata


In [37]:
! gunzip /Users/kirkreardon/Downloads/*_series_matrix.txt.gz

In [39]:
! head /Users/kirkreardon/Downloads/*_series_matrix.txt


==> /Users/kirkreardon/Downloads/GSE67835-GPL15520_series_matrix.txt <==
!Series_title	"A survey of human brain transcriptome diversity at the single cell level"
!Series_geo_accession	"GSE67835"
!Series_status	"Public on May 20 2015"
!Series_submission_date	"Apr 14 2015"
!Series_last_update_date	"May 26 2016"
!Series_pubmed_id	"26060301"
!Series_summary	"We used single cell RNA sequencing on 466 cells to capture the cellular complexity of the adult and fetal human brain at a whole transcriptome level. Healthy adult temporal lobe tissue was obtained from epileptic patients during temporal lobectomy for medically refractory seizures. We were able to classify individual cells into all of the major neuronal, glial, and vascular cell types in the brain."
!Series_overall_design	"Examination of cell types in healthy human brain samples."
!Series_type	"Expression profiling by high throughput sequencing"
!Series_contributor	"Spyros,,Darmanis"

==> /Users/kirkreardon/Downloads/GSE67835-GPL18573_series_matrix.txt <==
!Series_title	"A survey of human brain transcriptome diversity at the single cell level"
!Series_geo_accession	"GSE67835"
!Series_status	"Public on May 20 2015"
!Series_submission_date	"Apr 14 2015"
!Series_last_update_date	"May 26 2016"
!Series_pubmed_id	"26060301"
!Series_summary	"We used single cell RNA sequencing on 466 cells to capture the cellular complexity of the adult and fetal human brain at a whole transcriptome level. Healthy adult temporal lobe tissue was obtained from epileptic patients during temporal lobectomy for medically refractory seizures. We were able to classify individual cells into all of the major neuronal, glial, and vascular cell types in the brain."
!Series_overall_design	"Examination of cell types in healthy human brain samples."
!Series_type	"Expression profiling by high throughput sequencing"
!Series_contributor	"Spyros,,Darmanis"

In [40]:
! head -n 20 /Users/kirkreardon/Downloads/*_series_matrix.txt


==> /Users/kirkreardon/Downloads/GSE67835-GPL15520_series_matrix.txt <==
!Series_title	"A survey of human brain transcriptome diversity at the single cell level"
!Series_geo_accession	"GSE67835"
!Series_status	"Public on May 20 2015"
!Series_submission_date	"Apr 14 2015"
!Series_last_update_date	"May 26 2016"
!Series_pubmed_id	"26060301"
!Series_summary	"We used single cell RNA sequencing on 466 cells to capture the cellular complexity of the adult and fetal human brain at a whole transcriptome level. Healthy adult temporal lobe tissue was obtained from epileptic patients during temporal lobectomy for medically refractory seizures. We were able to classify individual cells into all of the major neuronal, glial, and vascular cell types in the brain."
!Series_overall_design	"Examination of cell types in healthy human brain samples."
!Series_type	"Expression profiling by high throughput sequencing"
!Series_contributor	"Spyros,,Darmanis"
!Series_contributor	"Martin,,Enge"
!Series_contributor	"Stephen,R,Quake"
!Series_contributor	"Steven,A,Sloan"
!Series_contributor	"Ben,A,Barres"
!Series_contributor	"Ye,,Zhang"
!Series_contributor	"Christine,,Caneda"
!Series_contributor	"Melanie,G,Hayden Gephart"
!Series_contributor	"Lawrence,M,Shuer"
!Series_sample_id	"GSM1657871 GSM1657872 GSM1657873 GSM1657874 GSM1657875 GSM1657876 GSM1657877 GSM1657878 GSM1657879 GSM1657880 GSM1657881 GSM1657882 GSM1657883 GSM1657884 GSM1657885 GSM1657886 GSM1657887 GSM1657888 GSM1657889 GSM1657890 GSM1657891 GSM1657892 GSM1657893 GSM1657894 GSM1657895 GSM1657896 GSM1657897 GSM1657898 GSM1657899 GSM1657900 GSM1657901 GSM1657902 GSM1657903 GSM1657904 GSM1657905 GSM1657906 GSM1657907 GSM1657908 GSM1657909 GSM1657910 GSM1657911 GSM1657912 GSM1657913 GSM1657914 GSM1657915 GSM1657916 GSM1657917 GSM1657918 GSM1657919 GSM1657920 GSM1657921 GSM1657922 GSM1657923 GSM1657924 GSM1657925 GSM1657926 GSM1657927 GSM1657928 GSM1657929 GSM1657930 GSM1657931 GSM1657932 GSM1657933 GSM1657934 GSM1657935 GSM1657936 GSM1657937 GSM1657938 GSM1657939 GSM1657940 GSM1657941 GSM1657942 GSM1657943 GSM1657944 GSM1657945 GSM1657946 GSM1657947 GSM1657948 GSM1657949 GSM1657950 GSM1657951 GSM1657952 GSM1657953 GSM1657954 GSM1657955 GSM1657956 GSM1657957 GSM1657958 GSM1657959 GSM1657960 GSM1657961 GSM1657962 GSM1657963 GSM1657964 GSM1657965 GSM1657966 GSM1657967 GSM1657968 GSM1657969 GSM1657970 GSM1657971 GSM1657972 GSM1657973 GSM1657974 GSM1657975 GSM1657976 GSM1657977 GSM1657978 GSM1657979 GSM1657980 GSM1657981 GSM1657982 GSM1657983 GSM1657984 GSM1657985 GSM1657986 GSM1657987 GSM1657988 GSM1657989 GSM1657990 GSM1657991 GSM1657992 GSM1657993 GSM1657994 GSM1657995 GSM1657996 GSM1657997 GSM1657998 GSM1657999 GSM1658000 GSM1658001 GSM1658002 GSM1658003 GSM1658004 GSM1658005 GSM1658006 GSM1658007 GSM1658008 GSM1658009 GSM1658010 GSM1658011 GSM1658012 GSM1658013 GSM1658014 GSM1658015 GSM1658016 GSM1658017 GSM1658018 GSM1658019 GSM1658020 GSM1658021 GSM1658022 GSM1658023 GSM1658024 GSM1658025 GSM1658026 GSM1658027 GSM1658028 GSM1658029 GSM1658030 GSM1658031 GSM1658032 GSM1658033 GSM1658034 GSM1658035 GSM1658036 GSM1658037 GSM1658038 GSM1658039 GSM1658040 GSM1658041 GSM1658042 GSM1658043 GSM1658044 GSM1658045 GSM1658046 GSM1658047 GSM1658048 GSM1658049 GSM1658050 GSM1658051 GSM1658052 GSM1658053 GSM1658054 GSM1658055 GSM1658056 GSM1658057 GSM1658058 GSM1658059 GSM1658060 GSM1658061 GSM1658062 GSM1658063 GSM1658064 GSM1658065 GSM1658066 GSM1658067 GSM1658068 GSM1658069 GSM1658070 GSM1658071 GSM1658072 GSM1658073 GSM1658074 GSM1658075 GSM1658076 GSM1658077 GSM1658078 GSM1658079 GSM1658080 GSM1658081 GSM1658082 GSM1658083 GSM1658084 GSM1658085 GSM1658086 GSM1658087 GSM1658088 GSM1658089 GSM1658090 GSM1658091 GSM1658092 GSM1658093 GSM1658094 GSM1658095 GSM1658096 GSM1658097 GSM1658098 GSM1658099 GSM1658100 GSM1658101 GSM1658102 GSM1658103 GSM1658104 GSM1658105 GSM1658106 GSM1658107 GSM1658108 GSM1658109 GSM1658110 GSM1658111 GSM1658112 GSM1658113 GSM1658114 GSM1658115 GSM1658116 GSM1658117 GSM1658118 GSM1658119 GSM1658120 GSM1658121 GSM1658122 GSM1658123 GSM1658124 GSM1658125 GSM1658126 GSM1658127 GSM1658128 GSM1658129 GSM1658130 GSM1658131 GSM1658132 GSM1658133 GSM1658134 GSM1658135 GSM1658136 GSM1658137 GSM1658138 GSM1658139 GSM1658140 GSM1658141 GSM1658142 GSM1658143 GSM1658144 GSM1658145 GSM1658146 GSM1658147 GSM1658148 GSM1658149 GSM1658150 GSM1658151 GSM1658152 GSM1658153 GSM1658154 GSM1658155 GSM1658156 GSM1658157 GSM1658158 GSM1658159 GSM1658160 GSM1658161 GSM1658162 GSM1658163 GSM1658164 GSM1658165 GSM1658166 GSM1658167 GSM1658168 GSM1658169 GSM1658170 GSM1658171 GSM1658172 GSM1658173 GSM1658174 GSM1658175 GSM1658176 GSM1658177 GSM1658178 GSM1658179 GSM1658180 GSM1658181 GSM1658182 GSM1658183 GSM1658184 GSM1658185 GSM1658186 GSM1658187 GSM1658188 GSM1658189 GSM1658190 GSM1658191 GSM1658192 GSM1658193 GSM1658194 GSM1658195 GSM1658196 GSM1658197 GSM1658198 GSM1658199 GSM1658200 GSM1658201 GSM1658202 GSM1658203 GSM1658204 GSM1658205 GSM1658206 GSM1658207 GSM1658208 GSM1658209 GSM1658210 GSM1658211 GSM1658212 GSM1658213 GSM1658214 GSM1658215 GSM1658216 GSM1658217 GSM1658218 GSM1658219 GSM1658220 GSM1658221 GSM1658222 GSM1658223 GSM1658224 GSM1658225 GSM1658226 GSM1658227 GSM1658228 GSM1658229 GSM1658230 GSM1658231 GSM1658232 GSM1658233 GSM1658234 GSM1658235 GSM1658236 GSM1658237 GSM1658238 GSM1658239 GSM1658240 GSM1658241 GSM1658242 GSM1658243 GSM1658244 GSM1658245 GSM1658246 GSM1658247 GSM1658248 GSM1658249 GSM1658251 GSM1658253 GSM1658255 GSM1658257 GSM1658259 GSM1658262 GSM1658264 GSM1658266 GSM1658268 GSM1658270 GSM1658272 GSM1658275 GSM1658277 GSM1658279 GSM1658281 GSM1658284 GSM1658286 GSM1658288 GSM1658290 GSM1658292 GSM1658294 GSM1658297 GSM1658299 GSM1658301 GSM1658304 GSM1658305 GSM1658306 GSM1658307 GSM1658308 GSM1658309 GSM1658310 GSM1658311 GSM1658312 GSM1658313 GSM1658314 GSM1658315 GSM1658316 GSM1658317 GSM1658318 GSM1658319 GSM1658320 GSM1658321 GSM1658322 GSM1658323 GSM1658324 GSM1658325 GSM1658326 GSM1658327 GSM1658328 GSM1658329 GSM1658330 GSM1658331 GSM1658332 GSM1658333 GSM1658334 GSM1658335 GSM1658336 GSM1658337 GSM1658338 GSM1658339 GSM1658340 GSM1658341 GSM1658342 GSM1658343 GSM1658344 GSM1658345 GSM1658346 GSM1658347 GSM1658348 GSM1658349 GSM1658350 GSM1658351 GSM1658352 GSM1658353 GSM1658354 GSM1658355 GSM1658356 GSM1658357 GSM1658358 GSM1658359 GSM1658360 GSM1658361 GSM1658362 GSM1658363 GSM1658364 GSM1658365 GSM1658366 "
!Series_contact_name	"Martin,,Enge"

==> /Users/kirkreardon/Downloads/GSE67835-GPL18573_series_matrix.txt <==
!Series_title	"A survey of human brain transcriptome diversity at the single cell level"
!Series_geo_accession	"GSE67835"
!Series_status	"Public on May 20 2015"
!Series_submission_date	"Apr 14 2015"
!Series_last_update_date	"May 26 2016"
!Series_pubmed_id	"26060301"
!Series_summary	"We used single cell RNA sequencing on 466 cells to capture the cellular complexity of the adult and fetal human brain at a whole transcriptome level. Healthy adult temporal lobe tissue was obtained from epileptic patients during temporal lobectomy for medically refractory seizures. We were able to classify individual cells into all of the major neuronal, glial, and vascular cell types in the brain."
!Series_overall_design	"Examination of cell types in healthy human brain samples."
!Series_type	"Expression profiling by high throughput sequencing"
!Series_contributor	"Spyros,,Darmanis"
!Series_contributor	"Martin,,Enge"
!Series_contributor	"Stephen,R,Quake"
!Series_contributor	"Steven,A,Sloan"
!Series_contributor	"Ben,A,Barres"
!Series_contributor	"Ye,,Zhang"
!Series_contributor	"Christine,,Caneda"
!Series_contributor	"Melanie,G,Hayden Gephart"
!Series_contributor	"Lawrence,M,Shuer"
!Series_sample_id	"GSM1657871 GSM1657872 GSM1657873 GSM1657874 GSM1657875 GSM1657876 GSM1657877 GSM1657878 GSM1657879 GSM1657880 GSM1657881 GSM1657882 GSM1657883 GSM1657884 GSM1657885 GSM1657886 GSM1657887 GSM1657888 GSM1657889 GSM1657890 GSM1657891 GSM1657892 GSM1657893 GSM1657894 GSM1657895 GSM1657896 GSM1657897 GSM1657898 GSM1657899 GSM1657900 GSM1657901 GSM1657902 GSM1657903 GSM1657904 GSM1657905 GSM1657906 GSM1657907 GSM1657908 GSM1657909 GSM1657910 GSM1657911 GSM1657912 GSM1657913 GSM1657914 GSM1657915 GSM1657916 GSM1657917 GSM1657918 GSM1657919 GSM1657920 GSM1657921 GSM1657922 GSM1657923 GSM1657924 GSM1657925 GSM1657926 GSM1657927 GSM1657928 GSM1657929 GSM1657930 GSM1657931 GSM1657932 GSM1657933 GSM1657934 GSM1657935 GSM1657936 GSM1657937 GSM1657938 GSM1657939 GSM1657940 GSM1657941 GSM1657942 GSM1657943 GSM1657944 GSM1657945 GSM1657946 GSM1657947 GSM1657948 GSM1657949 GSM1657950 GSM1657951 GSM1657952 GSM1657953 GSM1657954 GSM1657955 GSM1657956 GSM1657957 GSM1657958 GSM1657959 GSM1657960 GSM1657961 GSM1657962 GSM1657963 GSM1657964 GSM1657965 GSM1657966 GSM1657967 GSM1657968 GSM1657969 GSM1657970 GSM1657971 GSM1657972 GSM1657973 GSM1657974 GSM1657975 GSM1657976 GSM1657977 GSM1657978 GSM1657979 GSM1657980 GSM1657981 GSM1657982 GSM1657983 GSM1657984 GSM1657985 GSM1657986 GSM1657987 GSM1657988 GSM1657989 GSM1657990 GSM1657991 GSM1657992 GSM1657993 GSM1657994 GSM1657995 GSM1657996 GSM1657997 GSM1657998 GSM1657999 GSM1658000 GSM1658001 GSM1658002 GSM1658003 GSM1658004 GSM1658005 GSM1658006 GSM1658007 GSM1658008 GSM1658009 GSM1658010 GSM1658011 GSM1658012 GSM1658013 GSM1658014 GSM1658015 GSM1658016 GSM1658017 GSM1658018 GSM1658019 GSM1658020 GSM1658021 GSM1658022 GSM1658023 GSM1658024 GSM1658025 GSM1658026 GSM1658027 GSM1658028 GSM1658029 GSM1658030 GSM1658031 GSM1658032 GSM1658033 GSM1658034 GSM1658035 GSM1658036 GSM1658037 GSM1658038 GSM1658039 GSM1658040 GSM1658041 GSM1658042 GSM1658043 GSM1658044 GSM1658045 GSM1658046 GSM1658047 GSM1658048 GSM1658049 GSM1658050 GSM1658051 GSM1658052 GSM1658053 GSM1658054 GSM1658055 GSM1658056 GSM1658057 GSM1658058 GSM1658059 GSM1658060 GSM1658061 GSM1658062 GSM1658063 GSM1658064 GSM1658065 GSM1658066 GSM1658067 GSM1658068 GSM1658069 GSM1658070 GSM1658071 GSM1658072 GSM1658073 GSM1658074 GSM1658075 GSM1658076 GSM1658077 GSM1658078 GSM1658079 GSM1658080 GSM1658081 GSM1658082 GSM1658083 GSM1658084 GSM1658085 GSM1658086 GSM1658087 GSM1658088 GSM1658089 GSM1658090 GSM1658091 GSM1658092 GSM1658093 GSM1658094 GSM1658095 GSM1658096 GSM1658097 GSM1658098 GSM1658099 GSM1658100 GSM1658101 GSM1658102 GSM1658103 GSM1658104 GSM1658105 GSM1658106 GSM1658107 GSM1658108 GSM1658109 GSM1658110 GSM1658111 GSM1658112 GSM1658113 GSM1658114 GSM1658115 GSM1658116 GSM1658117 GSM1658118 GSM1658119 GSM1658120 GSM1658121 GSM1658122 GSM1658123 GSM1658124 GSM1658125 GSM1658126 GSM1658127 GSM1658128 GSM1658129 GSM1658130 GSM1658131 GSM1658132 GSM1658133 GSM1658134 GSM1658135 GSM1658136 GSM1658137 GSM1658138 GSM1658139 GSM1658140 GSM1658141 GSM1658142 GSM1658143 GSM1658144 GSM1658145 GSM1658146 GSM1658147 GSM1658148 GSM1658149 GSM1658150 GSM1658151 GSM1658152 GSM1658153 GSM1658154 GSM1658155 GSM1658156 GSM1658157 GSM1658158 GSM1658159 GSM1658160 GSM1658161 GSM1658162 GSM1658163 GSM1658164 GSM1658165 GSM1658166 GSM1658167 GSM1658168 GSM1658169 GSM1658170 GSM1658171 GSM1658172 GSM1658173 GSM1658174 GSM1658175 GSM1658176 GSM1658177 GSM1658178 GSM1658179 GSM1658180 GSM1658181 GSM1658182 GSM1658183 GSM1658184 GSM1658185 GSM1658186 GSM1658187 GSM1658188 GSM1658189 GSM1658190 GSM1658191 GSM1658192 GSM1658193 GSM1658194 GSM1658195 GSM1658196 GSM1658197 GSM1658198 GSM1658199 GSM1658200 GSM1658201 GSM1658202 GSM1658203 GSM1658204 GSM1658205 GSM1658206 GSM1658207 GSM1658208 GSM1658209 GSM1658210 GSM1658211 GSM1658212 GSM1658213 GSM1658214 GSM1658215 GSM1658216 GSM1658217 GSM1658218 GSM1658219 GSM1658220 GSM1658221 GSM1658222 GSM1658223 GSM1658224 GSM1658225 GSM1658226 GSM1658227 GSM1658228 GSM1658229 GSM1658230 GSM1658231 GSM1658232 GSM1658233 GSM1658234 GSM1658235 GSM1658236 GSM1658237 GSM1658238 GSM1658239 GSM1658240 GSM1658241 GSM1658242 GSM1658243 GSM1658244 GSM1658245 GSM1658246 GSM1658247 GSM1658248 GSM1658249 GSM1658251 GSM1658253 GSM1658255 GSM1658257 GSM1658259 GSM1658262 GSM1658264 GSM1658266 GSM1658268 GSM1658270 GSM1658272 GSM1658275 GSM1658277 GSM1658279 GSM1658281 GSM1658284 GSM1658286 GSM1658288 GSM1658290 GSM1658292 GSM1658294 GSM1658297 GSM1658299 GSM1658301 GSM1658304 GSM1658305 GSM1658306 GSM1658307 GSM1658308 GSM1658309 GSM1658310 GSM1658311 GSM1658312 GSM1658313 GSM1658314 GSM1658315 GSM1658316 GSM1658317 GSM1658318 GSM1658319 GSM1658320 GSM1658321 GSM1658322 GSM1658323 GSM1658324 GSM1658325 GSM1658326 GSM1658327 GSM1658328 GSM1658329 GSM1658330 GSM1658331 GSM1658332 GSM1658333 GSM1658334 GSM1658335 GSM1658336 GSM1658337 GSM1658338 GSM1658339 GSM1658340 GSM1658341 GSM1658342 GSM1658343 GSM1658344 GSM1658345 GSM1658346 GSM1658347 GSM1658348 GSM1658349 GSM1658350 GSM1658351 GSM1658352 GSM1658353 GSM1658354 GSM1658355 GSM1658356 GSM1658357 GSM1658358 GSM1658359 GSM1658360 GSM1658361 GSM1658362 GSM1658363 GSM1658364 GSM1658365 GSM1658366 "
!Series_contact_name	"Martin,,Enge"

In [47]:
"Whooo!!!!!!!!!".strip("!")


Out[47]:
'Whooo'

In [48]:
"Whooo!!!!!!!!!".strip("o")


Out[48]:
'Whooo!!!!!!!!!'

In [58]:
metadata1 = pd.read_table('/Users/kirkreardon/Downloads/GSE67835-GPL15520_series_matrix.txt', 
                          skiprows=37, header=None, index_col=0)
metadata1.index = metadata1.index.map(lambda x: x.strip('!'))
# Transpose so each row is a cell
metadata1 = metadata1.T
metadata1.head()


Out[58]:
Sample_title Sample_geo_accession Sample_status Sample_submission_date Sample_last_update_date Sample_type Sample_channel_count Sample_source_name_ch1 Sample_organism_ch1 Sample_characteristics_ch1 ... Sample_library_selection Sample_library_source Sample_library_strategy Sample_relation Sample_relation Sample_supplementary_file_1 Sample_supplementary_file_2 series_matrix_table_begin ID_REF series_matrix_table_end
1 healthy cortex cell 122 GSM1657992 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657992 NaN
2 healthy cortex cell 123 GSM1657993 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657993 NaN
3 healthy cortex cell 124 GSM1657994 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657994 NaN
4 healthy cortex cell 125 GSM1657995 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657995 NaN
5 healthy cortex cell 126 GSM1657996 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657996 NaN

5 rows × 47 columns


In [61]:
metadata2 = pd.read_table('/Users/kirkreardon/Downloads/GSE67835-GPL18573_series_matrix.txt', 
                          skiprows=37, header=None, index_col=0)
metadata2.index = metadata2.index.map(lambda x: x.strip('!'))
# transpose
metadata2 = metadata2.T
metadata2.head()


Out[61]:
Sample_title Sample_geo_accession Sample_status Sample_submission_date Sample_last_update_date Sample_type Sample_channel_count Sample_source_name_ch1 Sample_organism_ch1 Sample_characteristics_ch1 ... Sample_library_selection Sample_library_source Sample_library_strategy Sample_relation Sample_relation Sample_supplementary_file_1 Sample_supplementary_file_2 series_matrix_table_begin ID_REF series_matrix_table_end
1 healthy cortex cell 1 GSM1657871 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657871 NaN
2 healthy cortex cell 2 GSM1657872 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657872 NaN
3 healthy cortex cell 3 GSM1657873 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657873 NaN
4 healthy cortex cell 4 GSM1657874 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657874 NaN
5 healthy cortex cell 5 GSM1657875 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657875 NaN

5 rows × 47 columns


In [69]:
dataframes = [metadata1, metadata2]
metadata = pd.concat(dataframes)
print(metadata.shape)
metadata.head()


(466, 47)
Out[69]:
Sample_title Sample_geo_accession Sample_status Sample_submission_date Sample_last_update_date Sample_type Sample_channel_count Sample_source_name_ch1 Sample_organism_ch1 Sample_characteristics_ch1 ... Sample_library_selection Sample_library_source Sample_library_strategy Sample_relation Sample_relation Sample_supplementary_file_1 Sample_supplementary_file_2 series_matrix_table_begin ID_REF series_matrix_table_end
1 healthy cortex cell 122 GSM1657992 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657992 NaN
2 healthy cortex cell 123 GSM1657993 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657993 NaN
3 healthy cortex cell 124 GSM1657994 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657994 NaN
4 healthy cortex cell 125 GSM1657995 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657995 NaN
5 healthy cortex cell 126 GSM1657996 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657996 NaN

5 rows × 47 columns


In [70]:
metadata = metadata.set_index('Sample_geo_accession')
metadata.head()


Out[70]:
Sample_title Sample_status Sample_submission_date Sample_last_update_date Sample_type Sample_channel_count Sample_source_name_ch1 Sample_organism_ch1 Sample_characteristics_ch1 Sample_characteristics_ch1 ... Sample_library_selection Sample_library_source Sample_library_strategy Sample_relation Sample_relation Sample_supplementary_file_1 Sample_supplementary_file_2 series_matrix_table_begin ID_REF series_matrix_table_end
Sample_geo_accession
GSM1657992 healthy cortex cell 122 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex cell type: astrocytes ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657992 NaN
GSM1657993 healthy cortex cell 123 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex cell type: endothelial ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657993 NaN
GSM1657994 healthy cortex cell 124 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex cell type: microglia ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657994 NaN
GSM1657995 healthy cortex cell 125 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex cell type: endothelial ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657995 NaN
GSM1657996 healthy cortex cell 126 Public on May 20 2015 Apr 15 2015 Nov 06 2015 SRA 1 Brain Homo sapiens tissue: cortex cell type: microglia ... cDNA transcriptomic RNA-Seq BioSample: http://www.ncbi.nlm.nih.gov/biosamp... SRA: http://www.ncbi.nlm.nih.gov/sra?term=SRX9... ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supple... ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-insta... NaN GSM1657996 NaN

5 rows × 46 columns


In [82]:
mkdir -p ~/projects/darmanis2015/processed_data

In [83]:
expression.to_csv('~/projects/darmanis2015/processed_data/expression.csv')

In [84]:
metadata.to_csv('~/projects/darmanis2015/processed_data/metadata.csv')

In [85]:
expression.GSM1657884


Out[85]:
1/2-SBSRNA4                  0
A1BG                         0
A1BG-AS1                     0
A1CF                         0
A2LD1                        0
A2M                         12
A2ML1                        0
A2MP1                        0
A4GALT                       0
A4GNT                        0
AA06                         0
AAA1                         0
AAAS                         0
AACS                         0
AACSP1                       0
AADAC                        0
AADACL2                      0
AADACL3                      0
AADACL4                      0
AADAT                        0
AAGAB                        0
AAK1                        36
AAMP                         2
AANAT                        0
AARS                         4
AARS2                        0
AARSD1                       8
AASDH                        4
AASDHPPT                    29
AASS                         0
                         ...  
ZSCAN22                      0
ZSCAN23                      0
ZSCAN29                     36
ZSCAN30                      0
ZSCAN4                       0
ZSCAN5A                      0
ZSCAN5B                      0
ZSWIM1                       0
ZSWIM2                       0
ZSWIM3                       0
ZSWIM4                       0
ZSWIM5                       0
ZSWIM6                       0
ZSWIM7                      87
ZUFSP                        2
ZW10                         0
ZWILCH                       0
ZWINT                        0
ZXDA                         0
ZXDB                         0
ZXDC                         2
ZYG11A                       0
ZYG11B                       1
ZYX                          2
ZZEF1                       84
ZZZ3                         0
tAKR                         0
no_feature              824714
ambiguous                 3241
alignment_not_unique    596751
Name: GSM1657884, dtype: int64

In [92]:
bad_rows = ['no_feature', 'ambiguous', 'alignment_not_unique']
good_genes = expression.index[~expression.index.isin(bad_rows)]
good_genes


Out[92]:
Index(['1/2-SBSRNA4', 'A1BG', 'A1BG-AS1', 'A1CF', 'A2LD1', 'A2M', 'A2ML1',
       'A2MP1', 'A4GALT', 'A4GNT',
       ...
       'ZWINT', 'ZXDA', 'ZXDB', 'ZXDC', 'ZYG11A', 'ZYG11B', 'ZYX', 'ZZEF1',
       'ZZZ3', 'tAKR'],
      dtype='object', length=22085)

In [93]:
expression.shape


Out[93]:
(22088, 466)

In [94]:
expression_actually_genes = expression.loc[good_genes]
expression_actually_genes.shape


Out[94]:
(22085, 466)

In [79]:
expression_actually_genes.tail()


Out[79]:
GSM1657871 GSM1657872 GSM1657873 GSM1657874 GSM1657875 GSM1657876 GSM1657877 GSM1657878 GSM1657879 GSM1657880 ... GSM1658357 GSM1658358 GSM1658359 GSM1658360 GSM1658361 GSM1658362 GSM1658363 GSM1658364 GSM1658365 GSM1658366
0
ZZZ3 15 58 0 122 0 222 2 0 13 0 ... 0 66 0 0 0 0 4 0 0 7
tAKR 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
no_feature 822062 970221 952904 1301729 882830 1098392 768831 1436879 936055 729299 ... 1136953 862137 402523 913528 592939 505954 750682 606953 1063098 791301
ambiguous 5663 5771 7831 4780 5002 2388 10386 8422 4667 11112 ... 7730 4973 4168 5718 5249 5350 7668 7337 8792 4359
alignment_not_unique 697959 847047 613329 742635 765874 1118933 556215 1051949 669653 584841 ... 601759 525037 456087 570485 466276 465384 655463 460247 830121 730874

5 rows × 466 columns


In [95]:
expression_actually_genes.to_csv("/Users/kirkreardon/projects/darmanis2015/processed_data/expression_actually_genes.csv")

In [90]:
expression_actually_genes.dtypes


Out[90]:
GSM1657871    int64
GSM1657872    int64
GSM1657873    int64
GSM1657874    int64
GSM1657875    int64
GSM1657876    int64
GSM1657877    int64
GSM1657878    int64
GSM1657879    int64
GSM1657880    int64
GSM1657881    int64
GSM1657882    int64
GSM1657883    int64
GSM1657884    int64
GSM1657885    int64
GSM1657886    int64
GSM1657887    int64
GSM1657888    int64
GSM1657889    int64
GSM1657890    int64
GSM1657891    int64
GSM1657892    int64
GSM1657893    int64
GSM1657894    int64
GSM1657895    int64
GSM1657896    int64
GSM1657897    int64
GSM1657898    int64
GSM1657899    int64
GSM1657900    int64
              ...  
GSM1658337    int64
GSM1658338    int64
GSM1658339    int64
GSM1658340    int64
GSM1658341    int64
GSM1658342    int64
GSM1658343    int64
GSM1658344    int64
GSM1658345    int64
GSM1658346    int64
GSM1658347    int64
GSM1658348    int64
GSM1658349    int64
GSM1658350    int64
GSM1658351    int64
GSM1658352    int64
GSM1658353    int64
GSM1658354    int64
GSM1658355    int64
GSM1658356    int64
GSM1658357    int64
GSM1658358    int64
GSM1658359    int64
GSM1658360    int64
GSM1658361    int64
GSM1658362    int64
GSM1658363    int64
GSM1658364    int64
GSM1658365    int64
GSM1658366    int64
dtype: object

In [91]:
expression_actually_genes.tail().index


Out[91]:
Index(['ZZZ3 ', 'tAKR ', 'no_feature ', 'ambiguous ', 'alignment_not_unique '], dtype='object', name=0)

In [ ]: