indicators_clustered_completed.csv -- which has a list of all features imputted with cluster assignment
In [70]:
import warnings
import pandas as pd
import os
import csv
import xlrd
import numpy as np
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import MinMaxScaler, StandardScaler,RobustScaler, Normalizer
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
In [71]:
df = pd.read_csv('indicators_clustered_completed.csv')
In [72]:
df.head()
Out[72]:
In [73]:
df = df[['cbsa', 'msa_name', 'cluster','violent_crime_rate', 'murder_manslaughter', 'rape', 'robbery', 'aggravated_assault', 'property_crime_rate', 'burglary', 'larceny_theft', 'motor_vehicle_theft', 'total_crime_rate', 'median_gross_rent', 'median_monthly_mortgage', 'rent_burden', 'mortgage_burden', 'income_change_2012_to_2014', 'median_age_of_men', 'median_age_of_women', 'median_age', 'median_household_income', 'single_men_population', 'single_women_population', 'ratio_of_single_men_to_single_women', 'population_percent_of_single_men', 'population_percent_of_single_women', 'population', 'edu_average_scale_score', 'pct_laccess_pop10', 'pct_laccess_lowi10', 'pct_laccess_child10', 'pct_laccess_seniors10', 'pct_laccess_hhnv10', 'event_mpmt', 'fatalities_mpmt', 'injuries_mpmt', 'walk_score', 'transit_score', 'bike_score', 'unemploymentrate', 'employment', 'laborforce']]
In [74]:
df.columns.values
Out[74]:
In [ ]:
In [ ]:
In [30]:
import plotly.plotly as py
import cufflinks as cf
import pandas as pd
py.sign_in('karen.belita', 'eclu33vmlw')
bubbles_mpl = plt.figure()
cf.set_config_file(offline=False, world_readable=True, theme='pearl')
df.iplot(kind='bubble', x= 'total_crime_rate', y='rent_burden', size= 'population',
xTitle='total crime rate', yTitle='rent burden',
filename='clusters')
Out[30]:
In [77]:
df['population'].describe()
Out[77]:
In [79]:
df['population'] = (df['population']/100000)
In [80]:
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import math
trace0 = go.Scatter(x=df['employment'][df['cluster'] == 0],
y=df['walk_score'][df['cluster'] == 0],
mode='markers',
name='Cluster 0',
text=df['msa_name'][df['cluster'] == 0],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 0],
line=dict(width=2),))
trace1 = go.Scatter(x=df['employment'][df['cluster'] == 1],
y=df['walk_score'][df['cluster'] == 1],
mode='markers',
name='Cluster 1',
text=df['msa_name'][df['cluster'] == 1],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 1],
line=dict(width=2),))
trace2 = go.Scatter(x=df['employment'][df['cluster'] == 2],
y=df['walk_score'][df['cluster'] == 2],
mode='markers',
name='Cluster 2',
text=df['msa_name'][df['cluster'] == 2],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 2],
line=dict(width=2),))
trace3 = go.Scatter(x=df['employment'][df['cluster'] == 3],
y=df['walk_score'][df['cluster'] == 3],
mode='markers',
name='Cluster 3',
text=df['msa_name'][df['cluster'] == 3],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 3],
line=dict(width=2),))
trace4= go.Scatter(x=df['employment'][df['cluster'] == 4],
y=df['walk_score'][df['cluster'] == 4],
mode='markers',
name='Cluster 4',
text=df['msa_name'][df['cluster'] == 4],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 4],
line=dict(width=2),))
trace5= go.Scatter(x=df['employment'][df['cluster'] == 5],
y=df['walk_score'][df['cluster'] == 5],
mode='markers',
name='Cluster 5',
text=df['msa_name'][df['cluster'] == 5],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 5],
line=dict(width=2),))
trace6= go.Scatter(x=df['employment'][df['cluster'] == 6],
y=df['walk_score'][df['cluster'] == 6],
mode='markers',
name='Cluster 6',
text=df['msa_name'][df['cluster'] == 6],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 6],
line=dict(width=2),))
trace7= go.Scatter(x=df['employment'][df['cluster'] == 7],
y=df['walk_score'][df['cluster'] == 7],
mode='markers',
name='Cluster 7',
text=df['msa_name'][df['cluster'] == 7],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 7],
line=dict(width=2),))
trace8= go.Scatter(x=df['employment'][df['cluster'] == 8],
y=df['walk_score'][df['cluster'] == 8],
mode='markers',
name='Cluster 8',
text=df['msa_name'][df['cluster'] == 8],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 8],
line=dict(width=2),))
trace9= go.Scatter(x=df['employment'][df['cluster'] == 9],
y=df['walk_score'][df['cluster'] == 9],
mode='markers',
name='Cluster 9',
text=df['msa_name'][df['cluster'] == 9],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 9],
line=dict(width=2),))
trace10= go.Scatter(x=df['employment'][df['cluster'] == 10],
y=df['walk_score'][df['cluster'] == 10],
mode='markers',
name='Cluster 10',
text=df['msa_name'][df['cluster'] == 10],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 10],
line=dict(width=2),))
trace11 = go.Scatter(x=df['employment'][df['cluster'] == 11],
y=df['walk_score'][df['cluster'] == 11],
mode='markers',
name='Cluster 11',
text=df['msa_name'][df['cluster'] == 11],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 11],
line=dict(width=2),))
trace12 = go.Scatter(x=df['employment'][df['cluster'] == 12],
y=df['walk_score'][df['cluster'] == 12],
mode='markers',
name='Cluster 12',
text=df['msa_name'][df['cluster'] == 12],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 12],
line=dict(width=2),))
trace13 = go.Scatter(x=df['employment'][df['cluster'] == 13],
y=df['walk_score'][df['cluster'] == 13],
mode='markers',
name='Cluster 13',
text=df['msa_name'][df['cluster'] == 13],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 13],
line=dict(width=2),))
trace14= go.Scatter(x=df['employment'][df['cluster'] == 14],
y=df['walk_score'][df['cluster'] == 14],
mode='markers',
name='Cluster 14',
text=df['msa_name'][df['cluster'] == 14],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 14],
line=dict(width=2),))
trace15= go.Scatter(x=df['employment'][df['cluster'] == 15],
y=df['walk_score'][df['cluster'] == 15],
mode='markers',
name='Cluster 15',
text=df['msa_name'][df['cluster'] == 15],
marker=dict(symbol='circle',
sizemode='diameter',
sizeref=0.85,
size=df['population'][df['cluster'] == 15],
line=dict(width=2),))
data = [trace0,trace1, trace2, trace3, trace4,trace5,trace6,trace7,trace8,trace9,trace10,trace11,trace12,trace13,trace14,trace15]
layout = go.Layout(
title='Indicators and Clusters',
xaxis=dict(title='Employment',
gridcolor='rgb(255, 255, 255)',
type='log',
zerolinewidth=1,
ticklen=5,
gridwidth=2,),
yaxis=dict(title='Walkability',
gridcolor='rgb(255, 255, 255)',
zerolinewidth=1,
ticklen=5,
gridwidth=2,),
paper_bgcolor='rgb(243, 243, 243)',
plot_bgcolor='rgb(243, 243, 243)',)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='INDICATORS_CLUSTERS')
Out[80]:
https://plot.ly/~karen.belita/2/indicators-and-clusters/
https://plot.ly/~karen.belita/2.embed
In [ ]:
In [ ]:
In [ ]: