In [ ]:
from __future__ import print_function
!pip install -q papermill
!pip install -q matplotlib
!pip install -q networkx
import os
import tfx_utils
import tensorflow as tf
%matplotlib notebook
tf.get_logger().propagate = False
def _make_default_sqlite_uri(pipeline_name):
return os.path.join(os.environ['HOME'], 'airflow/tfx/metadata', pipeline_name, 'metadata.db')
def get_metadata_store(pipeline_name):
return tfx_utils.TFXReadonlyMetadataStore.from_sqlite_db(_make_default_sqlite_uri(pipeline_name))
pipeline_name = 'taxi'
pipeline_db_path = _make_default_sqlite_uri(pipeline_name)
print('Pipeline DB:\n{}'.format(pipeline_db_path))
store = get_metadata_store(pipeline_name)
Now print out the data artifacts:
In [ ]:
# Visualize properties of example artifacts
store.get_artifacts_of_type_df(tfx_utils.TFXArtifactTypes.EXAMPLES)
Now visualize the dataset features.
Hint: try ID 2 or 3
In [ ]:
# Visualize stats for data
store.display_stats_for_examples(<insert artifact ID here>)
Now plot the artifact lineage:
In [ ]:
# Try different IDs here. Click stop in the plot when changing IDs.
%matplotlib notebook
store.plot_artifact_lineage(<insert artifact ID here>)