In [89]:
import pandas as pd
In [90]:
dfgenes=dfgenes2=dfgenes3=dfgenes4=pd.read_excel("../data/GeneKB01.xlsx", sheetname=0,index_col="GeneSymbol", dtypes=object)
In [91]:
myeloidDiseaseList = ["Unknown", "AML", "MDS", "MPN", ]
myeloidSheetList = [1,2,3,4]
In [92]:
for disease, sheetindex in zip (myeloidDiseaseList, myeloidSheetList):
df=pd.read_excel("../data/GeneKB01.xlsx", sheetname=1, index_col="GeneSymbol")
df2=pd.read_excel("../data/GeneKB01.xlsx", sheetname=2, index_col="GeneSymbol", dtypes=object)
df3=pd.read_excel("../data/GeneKB01.xlsx", sheetname=3, index_col="GeneSymbol", dtypes=object)
df4=pd.read_excel("../data/GeneKB01.xlsx", sheetname=4, index_col="GeneSymbol", dtypes=object)
df.columns= disease +"_"+ df.columns
df2.columns= disease +"_"+ df2.columns
df3.columns= disease +"_"+ df3.columns
df4.columns= disease +"_"+ df4.columns
dfgenes = dfgenes.join(df, how="outer")
#dfgenes2 = dfgenes2.join(df2, how="outer")
#dfgenes3 = dfgenes3.join(df3, how="outer")
#dfgenes4 = dfgenes4.join(df4, how="outer")
In [93]:
dfgenes=dfgenes.fillna("")
dfgenes.start.astype(pd.np.int)
dfgenes.stop.astype(pd.np.int)
dfgenes.reset_index(inplace=True)
dfgenes2=dfgenes.fillna("")
dfgenes2.start.astype(pd.np.int)
dfgenes2.stop.astype(pd.np.int)
dfgenes2.reset_index(inplace=True)
dfgenes3=dfgenes.fillna("")
dfgenes3.start.astype(pd.np.int)
dfgenes3.stop.astype(pd.np.int)
dfgenes3.reset_index(inplace=True)
dfgenes4=dfgenes.fillna("")
dfgenes4.start.astype(pd.np.int)
dfgenes4.stop.astype(pd.np.int)
dfgenes4.reset_index(inplace=True)
In [94]:
def removespecchar(test):
import re
if type(test) == str:
test2=re.sub('\t','',test)
test=re.sub('\"','',test2)
return(test)
In [95]:
dfgenes=dfgenes.applymap(removespecchar)
dfgenes2=dfgenes2.applymap(removespecchar)
dfgenes3=dfgenes3.applymap(removespecchar)
dfgenes4=dfgenes4.applymap(removespecchar)
In [99]:
collist=list(dfgenes.columns)
newcollist=collist[6:9] + collist[0:6] + collist[9:]
import csv
dfgenes.to_csv("Testbedfile.bed", sep='\t', columns=newcollist, index=False)
collist2=list(dfgenes2.columns)
newcollist2=collist2[6:9] + collist2[0:6] + collist2[9:]
df2.to_csv("Testbedfile2.bed", sep='\t', columns=newcollist2, index=False)
collist3=list(dfgenes3.columns)
newcollist3=collist3[6:9] + collist3[0:6] + collist3[9:]
df3.to_csv("Testbedfile3.bed", sep='\t', columns=newcollist3, index=False)
collist4=list(dfgenes4.columns)
newcollist4=collist4[6:9] + collist4[0:6] + collist4[9:]
df4.to_csv("Testbedfile4.bed", sep='\t', columns=newcollist4, index=False)
In [ ]: