In [1]:
import pandas as pd
In [8]:
tax_map = "../data/shogun/database/miniGWG_darth_15.tax"
import csv
taxonomy = set()
with open(tax_map) as inf:
csv_inf = csv.reader(inf, delimiter="\t")
for row in csv_inf:
taxa_ = row[1]
taxonomy.add(';'.join(taxa_.split(';')[:-1]))
In [18]:
import numpy as np
taxonomy = list(taxonomy)
num_tax = len(taxonomy)
mat = np.zeros((num_tax, num_tax), int)
In [19]:
# Fill the diagonl with 1 and create a dataframe
np.fill_diagonal(mat, 1)
df = pd.DataFrame(mat, columns=taxonomy, index=taxonomy)
In [21]:
# Save the df
outfile = "../data/shogun/database/miniGWG_darth.bugbase.sampleperbug.txt"
df.to_csv(outfile, sep='\t', float_format="%d", na_rep=0, index_label="#OTU ID")
In [22]:
# Run SHOGUN from the command line
!shogun function -i "../data/shogun/database/miniGWG_darth.bugbase.sampleperbug.txt" -d "../data/shogun/database" -o "../results/bugbase" -l "species"