In [1]:
%matplotlib inline
In [41]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows
import os
os.chdir('/Users/evanbiederstedt/Downloads/required_binary_phylo_files')
import statsmodels.api as sm
In [42]:
%ls
In [43]:
trito_files = glob.glob("binary_position_RRBS_trito_pool*")
In [44]:
len(trito_files)
Out[44]:
In [ ]:
df1 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACAACC.bed.anno.csv")
df2 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACGTGG.bed.anno.csv")
df3 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACTCAC.bed.anno.csv")
df4 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ATAGCG.bed.anno.csv")
df5 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ATCGAC.bed.anno.csv")
df6 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CAAGAG.bed.anno.csv")
df7 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CATGAC.bed.anno.csv")
df8 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CCTTCG.bed.anno.csv")
df9 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CGGTAG.bed.anno.csv")
df10 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CTATTG.bed.anno.csv")
df11 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GACACG.bed.anno.csv")
df12 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GCATTC.bed.anno.csv")
df13 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GCTGCC.bed.anno.csv")
df14 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GGCATC.bed.anno.csv")
df15 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GTGAGG.bed.anno.csv")
df16 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GTTGAG.bed.anno.csv")
df17 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TAGCGG.bed.anno.csv")
df18 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TATCTC.bed.anno.csv")
df19 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TCTCTG.bed.anno.csv")
df20 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TGACAG.bed.anno.csv")
df21 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TGCTGC.bed.anno.csv")
In [9]:
df1 = df1.drop("Unnamed: 0", axis=1)
df2 = df2.drop("Unnamed: 0", axis=1)
df3 = df3.drop("Unnamed: 0", axis=1)
df4 = df4.drop("Unnamed: 0", axis=1)
df5 = df5.drop("Unnamed: 0", axis=1)
df6 = df6.drop("Unnamed: 0", axis=1)
df7 = df7.drop("Unnamed: 0", axis=1)
df8 = df8.drop("Unnamed: 0", axis=1)
df9 = df9.drop("Unnamed: 0", axis=1)
df10 = df10.drop("Unnamed: 0", axis=1)
df11 = df11.drop("Unnamed: 0", axis=1)
df12 = df12.drop("Unnamed: 0", axis=1)
df13 = df13.drop("Unnamed: 0", axis=1)
df14 = df14.drop("Unnamed: 0", axis=1)
df15 = df15.drop("Unnamed: 0", axis=1)
df16 = df16.drop("Unnamed: 0", axis=1)
df17 = df17.drop("Unnamed: 0", axis=1)
df18 = df18.drop("Unnamed: 0", axis=1)
df19 = df19.drop("Unnamed: 0", axis=1)
df20 = df20.drop("Unnamed: 0", axis=1)
df21 = df21.drop("Unnamed: 0", axis=1)
In [10]:
df_list = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11,
df12, df13, df14, df15, df16, df17, df18, df19, df20, df21]
In [ ]:
In [11]:
df1.head()
Out[11]:
In [ ]:
trito_matrix = pd.concat([df.set_index("position") for df in df_list], axis=1).reset_index().astype(object)
In [ ]:
trito_matrix.shape
In [ ]:
trito_matrix.head()
In [ ]:
trito_matrix = trito_matrix.drop("index", axis=1)
In [ ]:
trito_matrix.columns = ["RRBS_trito_pool_1_TAAGGCGA.ACAACC", "RRBS_trito_pool_1_TAAGGCGA.ACGTGG",
"RRBS_trito_pool_1_TAAGGCGA.ACTCAC", "RRBS_trito_pool_1_TAAGGCGA.ATAGCG",
"RRBS_trito_pool_1_TAAGGCGA.ATCGAC", "RRBS_trito_pool_1_TAAGGCGA.CAAGAG",
"RRBS_trito_pool_1_TAAGGCGA.CATGAC", "RRBS_trito_pool_1_TAAGGCGA.CCTTCG",
"RRBS_trito_pool_1_TAAGGCGA.CGGTAG", "RRBS_trito_pool_1_TAAGGCGA.CTATTG",
"RRBS_trito_pool_1_TAAGGCGA.GACACG", "RRBS_trito_pool_1_TAAGGCGA.GCATTC",
"RRBS_trito_pool_1_TAAGGCGA.GCTGCC", "RRBS_trito_pool_1_TAAGGCGA.GGCATC",
"RRBS_trito_pool_1_TAAGGCGA.GTGAGG", "RRBS_trito_pool_1_TAAGGCGA.GTTGAG",
"RRBS_trito_pool_1_TAAGGCGA.TAGCGG", "RRBS_trito_pool_1_TAAGGCGA.TATCTC",
"RRBS_trito_pool_1_TAAGGCGA.TCTCTG", "RRBS_trito_pool_1_TAAGGCGA.TGACAG",
"RRBS_trito_pool_1_TAAGGCGA.TGCTGC"]
In [ ]:
In [ ]:
In [ ]:
#trito_matrix = trito_matrix.T.astype(object) # don't transpose
In [ ]:
# trito_matrix.applymap(int)
In [ ]:
trito_matrix.head()
In [ ]:
#trito_matrix.to_csv("trito1_binary.csv", index=False, header=False)
In [ ]:
#f = pd.read_csv("trito1_binary.csv")
In [ ]:
#f
In [ ]:
In [ ]:
In [ ]:
trito_matrix = trito_matrix.applymap(lambda x: int(x) if pd.notnull(x) else str("?"))
In [ ]:
#trito_matrix.to_csv("tritopool_binary1.phy", header=None, index=None, sep=' ')
In [ ]:
trito_matrix.shape
In [ ]:
%pwd
In [ ]:
trito_matrix
In [ ]:
#trito_matrix.applymap(lambda x: int(x) if pd.notnull(x) else str("?"))
In [ ]:
#trito_matrix.to_csv("tritopool_correct1.phy", header=None, index=None)
In [ ]:
#trito_matrix.to_csv("tritopool_correct2.phy", header=None, index=None, sep=' ')
In [ ]:
#trito_matrix.to_csv("tritopool_correct3.phy", header=None, index=None, sep='')
In [ ]:
In [ ]:
trito_matrix = trito_matrix.astype(str).apply(''.join)
In [ ]:
trito_matrix
In [ ]:
type(trito_matrix)
In [ ]:
trito_matrix.index
In [ ]:
matt = pd.Series(trito_matrix.index.astype(str).str.cat(trito_matrix.astype(str),' '))
In [ ]:
matt
In [ ]:
trito_matrix = matt
In [ ]:
trito_matrix.to_csv("tritopool_1_final.phy", header=None, index=None)
In [ ]:
trito_matrix
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
trito_files2 = glob.glob("binary_position_RRBS_trito_pool_2*")
In [ ]:
len(trito_files2)
In [ ]:
trito_files2
In [12]:
df1 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACAACC.bed.anno.csv")
df2 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACGTGG.bed.anno.csv")
df3 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACTCAC.bed.anno.csv")
df4 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.AGGATG.bed.anno.csv")
df5 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ATAGCG.bed.anno.csv")
df6 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ATCGAC.bed.anno.csv")
df7 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CAAGAG.bed.anno.csv")
df8 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CATGAC.bed.anno.csv")
df9 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CCTTCG.bed.anno.csv")
df10 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CGGTAG.bed.anno.csv")
df11 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CTATTG.bed.anno.csv")
df12 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GACACG.bed.anno.csv")
df13 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GCATTC.bed.anno.csv")
df14 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GCTGCC.bed.anno.csv")
df15 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GGCATC.bed.anno.csv")
df16 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GTGAGG.bed.anno.csv")
df17 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GTTGAG.bed.anno.csv")
df18 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TAGCGG.bed.anno.csv")
df19 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TATCTC.bed.anno.csv")
df20 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TCTCTG.bed.anno.csv")
df21 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TGACAG.bed.anno.csv")
In [13]:
df1 = df1.drop("Unnamed: 0", axis=1)
df2 = df2.drop("Unnamed: 0", axis=1)
df3 = df3.drop("Unnamed: 0", axis=1)
df4 = df4.drop("Unnamed: 0", axis=1)
df5 = df5.drop("Unnamed: 0", axis=1)
df6 = df6.drop("Unnamed: 0", axis=1)
df7 = df7.drop("Unnamed: 0", axis=1)
df8 = df8.drop("Unnamed: 0", axis=1)
df9 = df9.drop("Unnamed: 0", axis=1)
df10 = df10.drop("Unnamed: 0", axis=1)
df11 = df11.drop("Unnamed: 0", axis=1)
df12 = df12.drop("Unnamed: 0", axis=1)
df13 = df13.drop("Unnamed: 0", axis=1)
df14 = df14.drop("Unnamed: 0", axis=1)
df15 = df15.drop("Unnamed: 0", axis=1)
df16 = df16.drop("Unnamed: 0", axis=1)
df17 = df17.drop("Unnamed: 0", axis=1)
df18 = df18.drop("Unnamed: 0", axis=1)
df19 = df19.drop("Unnamed: 0", axis=1)
df20 = df20.drop("Unnamed: 0", axis=1)
df21 = df21.drop("Unnamed: 0", axis=1)
In [14]:
df_list = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11,
df12, df13, df14, df15, df16, df17, df18, df19, df20, df21]
In [15]:
trito_matrix2 = pd.concat([df.set_index("position") for df in df_list], axis=1).reset_index().astype(object)
In [16]:
trito_matrix2.shape
Out[16]:
In [17]:
trito_matrix2.head()
Out[17]:
In [18]:
trito_matrix2 = trito_matrix2.drop("index", axis=1)
In [19]:
trito_matrix2.columns = ["RRBS_trito_pool_2_CGTACTAG.ACAACC", "RRBS_trito_pool_2_CGTACTAG.ACGTGG",
"RRBS_trito_pool_2_CGTACTAG.ACTCAC", "RRBS_trito_pool_2_CGTACTAG.AGGATG",
"RRBS_trito_pool_2_CGTACTAG.ATAGCG", "RRBS_trito_pool_2_CGTACTAG.ATCGAC",
"RRBS_trito_pool_2_CGTACTAG.CAAGAG", "RRBS_trito_pool_2_CGTACTAG.CATGAC",
"RRBS_trito_pool_2_CGTACTAG.CCTTCG", "RRBS_trito_pool_2_CGTACTAG.CGGTAG",
"RRBS_trito_pool_2_CGTACTAG.CTATTG", "RRBS_trito_pool_2_CGTACTAG.GACACG",
"RRBS_trito_pool_2_CGTACTAG.GCATTC", "RRBS_trito_pool_2_CGTACTAG.GCTGCC",
"RRBS_trito_pool_2_CGTACTAG.GGCATC", "RRBS_trito_pool_2_CGTACTAG.GTGAGG",
"RRBS_trito_pool_2_CGTACTAG.GTTGAG", "RRBS_trito_pool_2_CGTACTAG.TAGCGG",
"RRBS_trito_pool_2_CGTACTAG.TATCTC", "RRBS_trito_pool_2_CGTACTAG.TCTCTG",
"RRBS_trito_pool_2_CGTACTAG.TGACAG"]
In [20]:
trito_matrix2 = trito_matrix2.applymap(lambda x: int(x) if pd.notnull(x) else str("?"))
In [21]:
trito_matrix2 = trito_matrix2.astype(str).apply(''.join)
In [22]:
trito2 = pd.Series(trito_matrix2.index.astype(str).str.cat(trito_matrix2.astype(str),' '))
In [23]:
trito_matrix2 = trito2
In [24]:
trito_matrix2.to_csv("tritopool_2_final.phy", header=None, index=None)
In [ ]:
trito_matrix2
In [ ]:
trito_matrix2.shape
In [ ]:
trito_matrix.shape
In [ ]:
type(trito_matrix)
In [ ]:
In [ ]:
In [26]:
df1 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACAACC.bed.anno.csv")
df2 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACGTGG.bed.anno.csv")
df3 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ACTCAC.bed.anno.csv")
df4 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ATAGCG.bed.anno.csv")
df5 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.ATCGAC.bed.anno.csv")
df6 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CAAGAG.bed.anno.csv")
df7 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CATGAC.bed.anno.csv")
df8 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CCTTCG.bed.anno.csv")
df9 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CGGTAG.bed.anno.csv")
df10 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.CTATTG.bed.anno.csv")
df11 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GACACG.bed.anno.csv")
df12 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GCATTC.bed.anno.csv")
df13 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GCTGCC.bed.anno.csv")
df14 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GGCATC.bed.anno.csv")
df15 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GTGAGG.bed.anno.csv")
df16 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.GTTGAG.bed.anno.csv")
df17 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TAGCGG.bed.anno.csv")
df18 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TATCTC.bed.anno.csv")
df19 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TCTCTG.bed.anno.csv")
df20 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TGACAG.bed.anno.csv")
df21 = pd.read_csv("binary_position_RRBS_trito_pool_1_TAAGGCGA.TGCTGC.bed.anno.csv")
df22 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACAACC.bed.anno.csv")
df23 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACGTGG.bed.anno.csv")
df24 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ACTCAC.bed.anno.csv")
df25 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.AGGATG.bed.anno.csv")
df26 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ATAGCG.bed.anno.csv")
df27 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.ATCGAC.bed.anno.csv")
df28 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CAAGAG.bed.anno.csv")
df29 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CATGAC.bed.anno.csv")
df30 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CCTTCG.bed.anno.csv")
df31 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CGGTAG.bed.anno.csv")
df32 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.CTATTG.bed.anno.csv")
df33 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GACACG.bed.anno.csv")
df34 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GCATTC.bed.anno.csv")
df35 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GCTGCC.bed.anno.csv")
df36 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GGCATC.bed.anno.csv")
df37 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GTGAGG.bed.anno.csv")
df38 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.GTTGAG.bed.anno.csv")
df39 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TAGCGG.bed.anno.csv")
df40 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TATCTC.bed.anno.csv")
df41 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TCTCTG.bed.anno.csv")
df42 = pd.read_csv("binary_position_RRBS_trito_pool_2_CGTACTAG.TGACAG.bed.anno.csv")
In [27]:
df1 = df1.drop("Unnamed: 0", axis=1)
df2 = df2.drop("Unnamed: 0", axis=1)
df3 = df3.drop("Unnamed: 0", axis=1)
df4 = df4.drop("Unnamed: 0", axis=1)
df5 = df5.drop("Unnamed: 0", axis=1)
df6 = df6.drop("Unnamed: 0", axis=1)
df7 = df7.drop("Unnamed: 0", axis=1)
df8 = df8.drop("Unnamed: 0", axis=1)
df9 = df9.drop("Unnamed: 0", axis=1)
df10 = df10.drop("Unnamed: 0", axis=1)
df11 = df11.drop("Unnamed: 0", axis=1)
df12 = df12.drop("Unnamed: 0", axis=1)
df13 = df13.drop("Unnamed: 0", axis=1)
df14 = df14.drop("Unnamed: 0", axis=1)
df15 = df15.drop("Unnamed: 0", axis=1)
df16 = df16.drop("Unnamed: 0", axis=1)
df17 = df17.drop("Unnamed: 0", axis=1)
df18 = df18.drop("Unnamed: 0", axis=1)
df19 = df19.drop("Unnamed: 0", axis=1)
df20 = df20.drop("Unnamed: 0", axis=1)
df21 = df21.drop("Unnamed: 0", axis=1)
df22 = df22.drop("Unnamed: 0", axis=1)
df23 = df23.drop("Unnamed: 0", axis=1)
df24 = df24.drop("Unnamed: 0", axis=1)
df25 = df25.drop("Unnamed: 0", axis=1)
df26 = df26.drop("Unnamed: 0", axis=1)
df27 = df27.drop("Unnamed: 0", axis=1)
df28 = df28.drop("Unnamed: 0", axis=1)
df29 = df29.drop("Unnamed: 0", axis=1)
df30 = df30.drop("Unnamed: 0", axis=1)
df31 = df31.drop("Unnamed: 0", axis=1)
df32 = df32.drop("Unnamed: 0", axis=1)
df33 = df33.drop("Unnamed: 0", axis=1)
df34 = df34.drop("Unnamed: 0", axis=1)
df35 = df35.drop("Unnamed: 0", axis=1)
df36 = df36.drop("Unnamed: 0", axis=1)
df37 = df37.drop("Unnamed: 0", axis=1)
df38 = df38.drop("Unnamed: 0", axis=1)
df39 = df39.drop("Unnamed: 0", axis=1)
df40 = df40.drop("Unnamed: 0", axis=1)
df41 = df41.drop("Unnamed: 0", axis=1)
df42 = df42.drop("Unnamed: 0", axis=1)
In [28]:
df_list = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19,
df20, df21, df22, df23, df24, df25, df26, df27, df28, df29, df30, df31, df32, df33, df34, df35,
df36, df37, df38, df39, df40, df41, df42]
In [29]:
trito_matrix3 = pd.concat([df.set_index("position") for df in df_list], axis=1).reset_index().astype(object)
In [30]:
trito_matrix3.shape
Out[30]:
In [31]:
trito_matrix3 = trito_matrix3.drop("index", axis=1)
In [32]:
trito_matrix3.columns = ["RRBS_trito_pool_1_TAAGGCGA.ACAACC", "RRBS_trito_pool_1_TAAGGCGA.ACGTGG",
"RRBS_trito_pool_1_TAAGGCGA.ACTCAC", "RRBS_trito_pool_1_TAAGGCGA.ATAGCG",
"RRBS_trito_pool_1_TAAGGCGA.ATCGAC", "RRBS_trito_pool_1_TAAGGCGA.CAAGAG",
"RRBS_trito_pool_1_TAAGGCGA.CATGAC", "RRBS_trito_pool_1_TAAGGCGA.CCTTCG",
"RRBS_trito_pool_1_TAAGGCGA.CGGTAG", "RRBS_trito_pool_1_TAAGGCGA.CTATTG",
"RRBS_trito_pool_1_TAAGGCGA.GACACG", "RRBS_trito_pool_1_TAAGGCGA.GCATTC",
"RRBS_trito_pool_1_TAAGGCGA.GCTGCC", "RRBS_trito_pool_1_TAAGGCGA.GGCATC",
"RRBS_trito_pool_1_TAAGGCGA.GTGAGG", "RRBS_trito_pool_1_TAAGGCGA.GTTGAG",
"RRBS_trito_pool_1_TAAGGCGA.TAGCGG", "RRBS_trito_pool_1_TAAGGCGA.TATCTC",
"RRBS_trito_pool_1_TAAGGCGA.TCTCTG", "RRBS_trito_pool_1_TAAGGCGA.TGACAG",
"RRBS_trito_pool_1_TAAGGCGA.TGCTGC", "RRBS_trito_pool_2_CGTACTAG.ACAACC",
"RRBS_trito_pool_2_CGTACTAG.ACGTGG", "RRBS_trito_pool_2_CGTACTAG.ACTCAC",
"RRBS_trito_pool_2_CGTACTAG.AGGATG",
"RRBS_trito_pool_2_CGTACTAG.ATAGCG", "RRBS_trito_pool_2_CGTACTAG.ATCGAC",
"RRBS_trito_pool_2_CGTACTAG.CAAGAG", "RRBS_trito_pool_2_CGTACTAG.CATGAC",
"RRBS_trito_pool_2_CGTACTAG.CCTTCG", "RRBS_trito_pool_2_CGTACTAG.CGGTAG",
"RRBS_trito_pool_2_CGTACTAG.CTATTG", "RRBS_trito_pool_2_CGTACTAG.GACACG",
"RRBS_trito_pool_2_CGTACTAG.GCATTC", "RRBS_trito_pool_2_CGTACTAG.GCTGCC",
"RRBS_trito_pool_2_CGTACTAG.GGCATC", "RRBS_trito_pool_2_CGTACTAG.GTGAGG",
"RRBS_trito_pool_2_CGTACTAG.GTTGAG", "RRBS_trito_pool_2_CGTACTAG.TAGCGG",
"RRBS_trito_pool_2_CGTACTAG.TATCTC", "RRBS_trito_pool_2_CGTACTAG.TCTCTG",
"RRBS_trito_pool_2_CGTACTAG.TGACAG"]
In [33]:
trito_matrix3 = trito_matrix3.applymap(lambda x: int(x) if pd.notnull(x) else str("?"))
In [34]:
trito_matrix3 = trito_matrix3.astype(str).apply(''.join)
In [35]:
trito3 = pd.Series(trito_matrix3.index.astype(str).str.cat(trito_matrix3.astype(str),' '))
In [36]:
trito_matrix3 = trito3
In [37]:
trito_matrix3.to_csv("tritopool_total_final.phy", header=None, index=None)
In [38]:
trito_matrix3.head()
Out[38]:
In [ ]:
In [ ]:
In [ ]: