Using Great Expectations


In [0]:
import great_expectations as ge
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

In [0]:
df = pd.read_csv('/PATHTO/hospital_charge_data.csv')
df = ge.dataset.PandasDataset(df)
print(df.columns)

In [0]:
df.head()

In [0]:
df.expect_column_value_lengths_to_equal(column='Provider State', value=2)

In [0]:
df.expect_column_values_to_be_between(column=' Total Discharges ', min_value=0, max_value=200)

In [0]:
df.expect_column_values_to_be_of_type(column=' Average Covered Charges ', type_='string')

In [0]:
df.expect_column_values_to_match_regex(column='Provider Street Address',regex=r'^[0-9]*[\b\w]+')

In [0]:
df.expect_column_unique_value_count_to_be_between(column='Provider Id', min_value=3000, max_value=4000)

In [0]: