In [20]:
import pandas as pd
In [23]:
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()
In [24]:
df_rep82 = pd.read_csv("./../data/references/rep82/utree/rep82.gg.log", sep="\t", header=None, index_col=0)
df_rep82_dusted = pd.read_csv("./../data/references/rep82_dusted/utree/rep82_dusted.gg.log", sep="\t", header=None, index_col=0)
In [25]:
for df in (df_rep82, df_rep82_dusted):
df["level"] = [_.count(";") + 1 for _ in df.index]
In [34]:
from bokeh.charts import Histogram
hist = Histogram(list(df_rep82['level']), title="Taxonomic Levels", bins=3)
hist2 = Histogram(list(df_rep82_dusted['level']), title="Taxonomic Levels", bins=3)
In [32]:
show(hist)
In [35]:
show(hist2)
In [40]:
# Only two levels? Is the log being output correctly?
print(df_rep82["level"].unique())
print(df_rep82_dusted["level"].unique())
In [39]:
levels.unique()
Out[39]: