In [0]:
import json
import great_expectations as ge
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
In [0]:
titanic_df = ge.read_csv("tests/examples/titanic.csv")
In [0]:
titanic_df.head()
In [0]:
titanic_df.Age.hist()
In [0]:
titanic_df.expect_column_mean_to_be_between("Age", 20,40)
In [0]:
titanic_df.expect_column_values_to_be_between("Age", 0,80)
In [0]:
titanic_df.expect_column_values_to_match_regex('Name', '[A-Z][a-z]+(?: \([A-Z][a-z]+\))?, ', mostly=.95)
In [0]:
titanic_df.expect_column_values_to_be_in_set('Sex', ['male', 'female'])
In [0]:
titanic_df.expect_column_values_to_be_in_set('Survived', [1, 0])
In [0]:
titanic_df.expect_column_values_to_be_in_set('PClass', ['1st', '2nd', '3rd'])
In [0]:
#!!! Would be good to add an exception that verifies the rlationship between Sex and Sex Code
In [0]:
print json.dumps(titanic_df.get_expectation_suite(), indent=2)
# titanic_df.save_expectation_suite('titanic_expectations.json')
In [0]: