In [0]:
import great_expectations as ge
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
In [0]:
df = pd.read_csv('/PATHTO/hospital_charge_data.csv')
df = ge.dataset.PandasDataset(df)
print(df.columns)
In [0]:
df.head()
In [0]:
df.expect_column_value_lengths_to_equal(column='Provider State', value=2)
In [0]:
df.expect_column_values_to_be_between(column=' Total Discharges ', min_value=0, max_value=200)
In [0]:
df.expect_column_values_to_be_of_type(column=' Average Covered Charges ', type_='string')
In [0]:
df.expect_column_values_to_match_regex(column='Provider Street Address',regex=r'^[0-9]*[\b\w]+')
In [0]:
df.expect_column_unique_value_count_to_be_between(column='Provider Id', min_value=3000, max_value=4000)
In [0]: