In [20]:
from custom import load_sequence_and_metadata, seq2chararray, encode_array, compute_seq_lengths, get_density_interval
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.style.use('fivethirtyeight')
sns.set_context('talk')
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [2]:
# Load data
sequences, metadata = load_sequence_and_metadata()
In [13]:
metadata['Year'] = metadata['Collection Date'].apply(lambda x: x.year)
ax = metadata.groupby('Year').count()['Name'].plot()
ax.set_title('Number of Sequences Per Year')
ax.set_ylabel('Number of Sequences')
plt.show()
In [27]:
meta_coords = pd.read_csv('data/metadata_with_embeddings.csv')
array = meta_coords[['coords0', 'coords1', 'coords2']]
lowp, highp = get_density_interval(99, array)
lowp, highp
Out[27]:
In [30]:
array
Out[30]:
In [ ]:
from bokeh.plotting import figure, show, output_notebook
output_notebook()
In [ ]:
p = figure()
In [ ]: