Data were munged here.
In [3]:
import pandas as pd
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [4]:
df = pd.read_csv('../../data/processed/complaints-3-29-scrape.csv')
owners = pd.read_csv('../../data/raw/APD_HistOwner.csv')
In [5]:
owners.rename(columns={'HOW_IdNumber':'owner_id','HOW_CcmuNumber': 'fac_ccmunumber', 'HOW_DateActive':'license_date'}, inplace=True)
owners['license_date'] = pd.to_datetime(owners['license_date'])
In [6]:
owners = owners[['fac_ccmunumber','license_date','owner_id']]
In [7]:
#Last ownership change
owners[owners['fac_ccmunumber']=='50M132']
Out[7]:
In [8]:
#Slice of public River Grove complaints
rg = df[(df['facility_id']=='50M132') & (df['public']=='online')]
In [9]:
rg[rg['incident_date']>='2015-04-01'].count()[0]
Out[9]:
In [10]:
rg[rg['incident_date']<'2015-04-01'].count()[0]
Out[10]:
In [11]:
rg[(rg['incident_date']<'2015-04-01') & (rg['incident_date']>'2013-04-01')].count()[0]
Out[11]:
In [12]:
rg[['incident_date','online_fac_name']].drop_duplicates(subset='online_fac_name', keep='first').sort_values('incident_date', ascending=False)
Out[12]: