readingKB-checkpoint



In [89]:
import pandas as pd

In [90]:
dfgenes=dfgenes2=dfgenes3=dfgenes4=pd.read_excel("../data/GeneKB01.xlsx", sheetname=0,index_col="GeneSymbol", dtypes=object)

In [91]:
myeloidDiseaseList = ["Unknown", "AML", "MDS", "MPN", ]
myeloidSheetList = [1,2,3,4]

In [92]:
for disease, sheetindex in zip (myeloidDiseaseList, myeloidSheetList):
    df=pd.read_excel("../data/GeneKB01.xlsx", sheetname=1, index_col="GeneSymbol")
    df2=pd.read_excel("../data/GeneKB01.xlsx", sheetname=2, index_col="GeneSymbol", dtypes=object)
    df3=pd.read_excel("../data/GeneKB01.xlsx", sheetname=3, index_col="GeneSymbol", dtypes=object)
    df4=pd.read_excel("../data/GeneKB01.xlsx", sheetname=4, index_col="GeneSymbol", dtypes=object)
    df.columns= disease +"_"+ df.columns
    df2.columns= disease +"_"+ df2.columns
    df3.columns= disease +"_"+ df3.columns
    df4.columns= disease +"_"+ df4.columns
    dfgenes = dfgenes.join(df,  how="outer")
    #dfgenes2 = dfgenes2.join(df2,  how="outer")
    #dfgenes3 = dfgenes3.join(df3,  how="outer")
    #dfgenes4 = dfgenes4.join(df4,  how="outer")

In [93]:
dfgenes=dfgenes.fillna("")
dfgenes.start.astype(pd.np.int)
dfgenes.stop.astype(pd.np.int)
dfgenes.reset_index(inplace=True)
dfgenes2=dfgenes.fillna("")
dfgenes2.start.astype(pd.np.int)
dfgenes2.stop.astype(pd.np.int)
dfgenes2.reset_index(inplace=True)
dfgenes3=dfgenes.fillna("")
dfgenes3.start.astype(pd.np.int)
dfgenes3.stop.astype(pd.np.int)
dfgenes3.reset_index(inplace=True)
dfgenes4=dfgenes.fillna("")
dfgenes4.start.astype(pd.np.int)
dfgenes4.stop.astype(pd.np.int)
dfgenes4.reset_index(inplace=True)

In [94]:
def removespecchar(test):
    import re
    if type(test) == str:
        test2=re.sub('\t','',test)
        test=re.sub('\"','',test2)
    return(test)

In [95]:
dfgenes=dfgenes.applymap(removespecchar)
dfgenes2=dfgenes2.applymap(removespecchar)
dfgenes3=dfgenes3.applymap(removespecchar)
dfgenes4=dfgenes4.applymap(removespecchar)

In [99]:
collist=list(dfgenes.columns)
newcollist=collist[6:9] + collist[0:6] + collist[9:]
import csv
dfgenes.to_csv("Testbedfile.bed", sep='\t', columns=newcollist, index=False)
collist2=list(dfgenes2.columns)
newcollist2=collist2[6:9] + collist2[0:6] + collist2[9:]
df2.to_csv("Testbedfile2.bed", sep='\t', columns=newcollist2, index=False)
collist3=list(dfgenes3.columns)
newcollist3=collist3[6:9] + collist3[0:6] + collist3[9:]
df3.to_csv("Testbedfile3.bed", sep='\t', columns=newcollist3, index=False)
collist4=list(dfgenes4.columns)
newcollist4=collist4[6:9] + collist4[0:6] + collist4[9:]
df4.to_csv("Testbedfile4.bed", sep='\t', columns=newcollist4, index=False)

In [ ]: