In [1]:
import pandas as pd
import numpy as np
import pickle
import statsmodels.api as sm
from sklearn import cluster
import matplotlib.pyplot as plt
%matplotlib inline
from bs4 import BeautifulSoup as bs
import requests
import time
# from ggplot import *
In [10]:
asthma_data = pd.read_csv('asthma-emergency-department-visit-rates-by-zip-code.csv')
asthma_data.head()
Out[10]:
In [12]:
asthma_data[['zip','coordinates']] = asthma_data.loc[:,'ZIP code'].str.split(
pat='\n',expand=True)
asthma_data.drop('ZIP code', axis=1,inplace=True)
asthma_data.head(2)
Out[12]:
In [5]:
asthma_grouped = asthma_data.groupby(by=['Year','zip']).sum()
asthma_grouped.head(4)
Out[5]:
In [43]:
asthma_grouped.drop('County Fips code',axis=1,inplace=True)
temp_grp = asthma_data.groupby(by=['Year','zip']).first()
asthma_grouped[['fips','county','coordinates']]=temp_grp.loc[:,['County Fips code',
'County',
'coordinates']]
asthma_grouped.loc[:,'Number of Visits']=asthma_grouped.loc[:,'Number of Visits']/2
asthma_grouped.head(2)
Out[43]:
In [60]:
A = [5]
B = A
A.append(6)
print(B)
In [61]:
import copy
A = [5]
B = A.copy()
A.append(6)
print(B)
In [62]:
asthma_grouped[['fips','county','coordinates']]=temp_grp.loc[:,['County Fips code',
'County',
'coordinates']].copy()
In [48]:
asthma_unstacked = asthma_data.pivot_table(index = ['Year',
'zip',
'County',
'coordinates',
'County Fips code'],
columns = 'Age Group',
values = 'Number of Visits')
asthma_unstacked.reset_index(drop=False,inplace=True)
asthma_unstacked.head(2)
Out[48]:
In [49]:
asthma_unstacked.rename(columns={
'zip':'Zip',
'coordinates':'Coordinates',
'County Fips code':'Fips',
'Adults (18+)':'Adults',
'All Ages':'Incidents',
'Children (0-17)': 'Children'
},
inplace=True)
asthma_2015 = asthma_unstacked.loc[asthma_unstacked.Year==2015,:]
asthma_2015.head(2)
Out[49]:
In [287]:
pickle.dump(asthma_unstacked,open('asthma_unstacked.p','wb'))
asthma_unstacked.to_csv('asthma_unstacked.csv')
In [288]:
asthma_unstacked = pickle.load(open('asthma_unstacked.p','rb'))
The base URL or endpoint is:
http://www.airnowapi.org/aq/observation/zipCode/historical/
? tells us that this is a query.
& separates name, value pairs within the request.
Five name, value pairs POSTED
In [19]:
base_url = "http://www.airnowapi.org/aq/observation/zipCode/historical/"
attributes = ["format=application/json",
"zipCode=20007",
"date=2017-09-05T00-0000",
"distance=25",
"API_KEY=39DC3727-09BD-48C4-BBD8-XXXXXXXXXXXX"
]
post_url = '&'.join(attributes)
print(post_url)
In [17]:
ingredients=requests.get(base_url, post_url)
ingredients = ingredients.json()
print(ingredients[0])
In [18]:
for item in ingredients:
AQIType = item['ParameterName']
City=item['ReportingArea']
AQIValue=item['AQI']
print("For Location ", City, " the AQI for ", AQIType, "is ", AQIValue)
In [ ]:
time.sleep(1)
In [ ]:
base_url = "http://www.airnowapi.org/aq/observation/zipCode/historical/"
zips = asthma_2015.Zip.unique()
zips = zips[:450]
date ="date=2015-09-01T00-0000"
api_key = "API_KEY=39DC3727-09BD-48C4-BBD8-XXXXXXXXXXXX"
return_format = "format=application/json"
zip_str = "zipCode="
post_url = "&".join([date,api_key,return_format,zip_str])
data_dict = {}
for zipcode in zips:
time.sleep(1)
zip_post = post_url + str(zipcode)
ingredients = requests.get(base_url, zip_post)
ingredients = ingredients.json()
zip_data = {}
for data_point in ingredients:
AQIType = data_point['ParameterName']
AQIVal = data_point['AQI']
zip_data[AQIType] = AQIVal
data_dict[zipcode]= zip_data
In [21]:
ingredients = requests.get("https://en.wikipedia.org/wiki/Data_science")
soup = bs(ingredients.text)
print(soup.body.p)
In [23]:
parser_div = soup.find("div", class_="mw-parser-output")
wiki_content = parser_div.find_all('p')
print(wiki_content[0])
print('*****************************************')
print(wiki_content[0].text)
In [306]:
pickle.dump(data_dict,open('AQI_data_raw.p','wb'))
In [307]:
collected = list(data_dict.keys())
asthma_2015_sub = asthma_2015.loc[asthma_2015.Zip.isin(collected),:]
In [308]:
aqi_data = pd.DataFrame.from_dict(data_dict, orient='index')
aqi_data.reset_index(drop=False,inplace=True)
aqi_data.rename(columns={'index':'Zip'},inplace=True)
aqi_data.head()
Out[308]:
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html
In [309]:
asthma_aqi = asthma_2015_sub.merge(aqi_data,how='outer',on='Zip')
asthma_aqi.head(2)
Out[309]:
In [310]:
asthma_aqi.Incidents.plot.hist(20)
Out[310]:
In [311]:
asthma_aqi.loc[:,['Incidents','OZONE']].plot.density()
Out[311]:
In [312]:
asthma_aqi.loc[:,['PM2.5','PM10']].plot.hist()
Out[312]:
In [327]:
asthma_aqi.plot.scatter('OZONE','PM2.5')
Out[327]:
In [320]:
y =asthma_aqi.loc[:,'Incidents']
x =asthma_aqi.loc[:,['OZONE','PM2.5']]
x['c'] = 1
ols_model1 = sm.OLS(y,x,missing='drop')
results = ols_model1.fit()
print(results.summary())
pickle.dump([results,ols_model1],open('ols_model_results.p','wb'))
In [325]:
model_df = asthma_aqi.loc[:,['OZONE','PM2.5','Incidents',]]
model_df.dropna(axis=0,inplace=True)
model_df = (model_df - model_df.mean()) / (model_df.max() - model_df.min())
asthma_air_clusters=cluster.KMeans(n_clusters = 3)
asthma_air_clusters.fit(model_df)
model_df['clusters3']=asthma_air_clusters.labels_
In [326]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(4, 3))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
labels = asthma_air_clusters.labels_
ax.scatter(model_df.loc[:, 'PM2.5'], model_df.loc[:, 'OZONE'], model_df.loc[:, 'Incidents'],
c=labels.astype(np.float), edgecolor='k')
ax.set_xlabel('Particulates')
ax.set_ylabel('Ozone')
ax.set_zlabel('Incidents')
Out[326]: