In [1]:
import pandas as pd
In [3]:
family_classification_metadata = pd.read_table('../seminar_5/data/family_classification_metadata.tab')
family_classification_sequences = pd.read_table('../seminar_5/data/family_classification_sequences.tab')
In [4]:
family_classification_metadata.head()
Out[4]:
In [5]:
family_classification_sequences.head()
Out[5]:
In [7]:
family_classification_metadata.describe()
Out[7]:
Use your ProtVec embedding from homework 5 to perform protein family classification using RNN.
Article with the original research can be found here http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0141287&type=printable