In [1]:
# You might have a data analyst create a report to see if there's any connection between a customer’s address and the types of shoes they buy. You might find that people in warmer areas are more likely to buy brightly colored shoes.
# generate random data
shoes_type = ['lightweight trainer', 'cross-trainer', 'trail', 'stability', 'cushioned stability', 'neutral', 'motion control', 'walking', 'racing']
colors = np.arange(10)
states = ['NA', 'MS', 'GA', 'AL', 'MT', 'VT', 'OK', 'UT', 'IA', 'FL', 'MP', 'PR', 'WY', 'RI', 'MO', 'MN', 'WA', 'IN', 'MA', 'MI', 'PA', 'KS', 'WV', 'LA', 'CO', 'NJ', 'GU', 'VA', 'WI', 'AK', 'OH', 'MD', 'CA', 'TN', 'NC', 'DC', 'HI', 'AZ', 'AR', 'ID', 'NV', 'NE', 'ND', 'CT', 'DE', 'VI', 'TX', 'ME', 'OR', 'NH', 'SC', 'SD', 'AS', 'IL', 'KY', 'NM', 'NY']
data = []
n_records = 1000
for i in range(n_records):
rd_state = rd.randint(0, len(states)-1)
rd_type = rd.randint(0,len(shoes_type)-1)
rd_color = rd.randint(0, len(colors)-1)
data.append([i, random_date(), shoes_type[rd_type], colors[rd_color], states[rd_state]])
df = pd.DataFrame(data, columns = ['id', 'timestamp', 'shoe-type', 'color', 'state'])
# df.to_csv('csv_output/ch2_fig1.csv', index=False)
df = pd.read_csv('csv_output/ch2_fig1.csv')
df.head()
Out[1]:
In [8]:
df = pd.read_csv('csv_output/ch2_fig1.csv')
df2 = df.groupby(['state']).color.mean().reset_index()
df2.head()
import plotly
import plotly.plotly as py
# to publish online
# plotly.tools.set_credentials_file(username='your user name', api_key='your api key')
# off line
# plotly.offline.init_notebook_mode()
for col in df2.columns:
df2[col] = df2[col].astype(str)
scl = [[0, 'rgb(242,240,247)'],[2, 'rgb(218,218,235)'],[4, 'rgb(188,189,220)'],
[6, 'rgb(158,154,200)'],[8, 'rgb(117,107,177)'],[10, 'rgb(84,39,143)']]
data = [ dict(
type='choropleth',
colorscale = scl,
autocolorscale = False,
locations = df2['state'],
z = df2['color'].astype(float),
locationmode = 'USA-states',
# text = df['text'],
marker = dict(
line = dict (
color = 'rgb(255,255,255)',
width = 2
)
),
colorbar = dict(
title = "Color Brightness"
)
) ]
layout = dict(
title = 'Color Brightness Average by State',
geo = dict(
scope='usa',
projection=dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)',
),
)
fig = dict( data=data, layout=layout )
# publish online
# py.iplot( fig, filename='d3-cloropleth-map' )
# offline
# plotly.offline.iplot(fig)
# py.image.save_as(fig, filename='svg_output/ch2_fig1.png')
In [6]:
%%html
<div>
<a href="https://plot.ly/~l1990790120271c/29/" target="_blank" title="Color Brightness Average by State" style="display: block; text-align: center;"><img src="https://plot.ly/~l1990790120271c/29.png" alt="Color Brightness Average by State" style="max-width: 100%;width: 600px;" width="600" onerror="this.onerror=null;this.src='https://plot.ly/404.png';" /></a>
<script data-plotly="l1990790120271c:29" src="https://plot.ly/embed.js" async></script>
</div>