This is a dataset of Assisted Living, Nursing and Residential Care facilities in Oregon, open as of September, 2016. For each, we have:
Data were munged here.
In [91]:
import pandas as pd
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [92]:
df = pd.read_csv('../../data/processed/facilities-3-29-scrape.csv')
In [93]:
df.count()[0]
Out[93]:
Those that have no offline records.
In [94]:
df[(df['offline'].isnull())].count()[0]
Out[94]:
Those that have offline records.
In [95]:
df[(df['offline'].notnull())].count()[0]
Out[95]:
In [96]:
df[(df['offline']>df['online']) & (df['online'].notnull())].count()[0]
Out[96]:
In [97]:
df[(df['online'].isnull()) & (df['offline'].notnull())].count()[0]
Out[97]:
In [98]:
df[(df['online'].notnull()) & (df['offline'].isnull())].count()[0]
Out[98]:
In [99]:
df[(df['online'].notnull()) | df['offline'].notnull()].count()[0]
Out[99]:
In [100]:
df[(df['offline'].isnull())].count()[0]/df.count()[0]*100
Out[100]:
In [101]:
df[df['offline'].notnull()].sum()['fac_capacity']
Out[101]:
In [102]:
df[df['online'].isnull()].count()[0]
Out[102]:
In [114]:
over_50 = df[((df['offline']+df['online'])>50)]
In [115]:
over_50['total'] = over_50['online']+over_50['offline']
In [116]:
over_50['pct_offline'] = over_50['offline']/over_50['total']*100
In [117]:
over_50[over_50['facility_name']=='Avamere Health Services of Rogue Valley']
Out[117]:
In [118]:
over_50.sort_values('pct_offline',ascending = False).head(10)
Out[118]:
In [ ]: