In [37]:
import os
os.environ['KB_AUTH_TOKEN'] = open('/tmp/kb_auth_token.txt').read().strip()
In [42]:
import biokbase
import biokbase.data_api.tests as tests
import biokbase.data_api.object
import biokbase.data_api.assembly
import biokbase.data_api.taxon
import biokbase.data_api.genome_annotation
import pandas as pd
import numpy as np
import qgrid as qg
qg.nbinstall()
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
%matplotlib inline
In [43]:
def get_genome_summary(genome_annotation=None):
if genome_annotation == None:
raise TypeError("No GenomeAnnotation object given.")
elif genome_annotation.get_typestring().split('-')[0] not in biokbase.data_api.genome_annotation.TYPES:
raise TypeError("{0} is not a recognized GenomeAnnotation type.".format(type(genome_annotation)))
annotation_details = dict()
annotation_details["feature_type_counts"] = genome_annotation.get_feature_type_counts(annotation_details["feature_types"])
print(annotation_details["feature_type_counts"])
return annotation_details
In [44]:
import pprint
import datetime
def parse_all_existing_annotations():
annotations = dict()
for x in object_list:
if x.type.startswith("KBaseGenomes.Genome"):
annotations[x.name] = x
return annotations
b = biokbase.data_api.browse(1011)
object_list = b.ls()
melampsora = b["kb|g.3157"]
arabidopsis = b["kb|g.3899"]
In [45]:
annotations = {test.name: test.object}
start = datetime.datetime.utcnow()
for n in annotations:
print '\n'
print '#'*80
print '#'*80
print '\n'
print n
overview = get_genome_summary(annotations[n])
pprint.pprint(overview)
print '\n'
print '#'*80
print '#'*80
print '\n'
end = datetime.datetime.utcnow()
print "Total time to summarize existing Genome Annotations, Taxons, Assemblies : {0}".format(end - start)
In [ ]: