Ch2 Figure1



In [1]:

    
# You might have a data analyst create a report to see if there's any connection between a customer’s address and the types of shoes they buy. You might find that people in warmer areas are more likely to buy brightly colored shoes.

# generate random data
shoes_type = ['lightweight trainer', 'cross-trainer', 'trail', 'stability', 'cushioned stability', 'neutral', 'motion control', 'walking', 'racing']
colors = np.arange(10)
states = ['NA', 'MS', 'GA', 'AL', 'MT', 'VT', 'OK', 'UT', 'IA', 'FL', 'MP', 'PR', 'WY', 'RI', 'MO', 'MN', 'WA', 'IN', 'MA', 'MI', 'PA', 'KS', 'WV', 'LA', 'CO', 'NJ', 'GU', 'VA', 'WI', 'AK', 'OH', 'MD', 'CA', 'TN', 'NC', 'DC', 'HI', 'AZ', 'AR', 'ID', 'NV', 'NE', 'ND', 'CT', 'DE', 'VI', 'TX', 'ME', 'OR', 'NH', 'SC', 'SD', 'AS', 'IL', 'KY', 'NM', 'NY']
data = []
n_records = 1000

for i in range(n_records):
    rd_state = rd.randint(0, len(states)-1)
    rd_type = rd.randint(0,len(shoes_type)-1)
    rd_color = rd.randint(0, len(colors)-1)
    data.append([i, random_date(), shoes_type[rd_type], colors[rd_color], states[rd_state]])

df = pd.DataFrame(data, columns = ['id', 'timestamp', 'shoe-type', 'color', 'state'])
# df.to_csv('csv_output/ch2_fig1.csv', index=False)
df = pd.read_csv('csv_output/ch2_fig1.csv')
df.head()









    Out[1]:






  
    
      
      id
      timestamp
      shoe-type
      color
      state
    
  
  
    
      0
      0
      2009-03-25 05:05:40
      oxfords
      6
      UT
    
    
      1
      1
      2009-03-14 18:04:11
      loafers
      4
      AS
    
    
      2
      2
      2009-03-11 17:06:29
      oxfords
      8
      FL
    
    
      3
      3
      2009-03-15 07:18:42
      oxfords
      9
      MS
    
    
      4
      4
      2009-03-20 06:56:17
      slippers
      3
      DC



In [8]:

    
df = pd.read_csv('csv_output/ch2_fig1.csv')
df2 = df.groupby(['state']).color.mean().reset_index()
df2.head()

import plotly
import plotly.plotly as py

# to publish online
# plotly.tools.set_credentials_file(username='your user name', api_key='your api key')

# off line
# plotly.offline.init_notebook_mode()

for col in df2.columns:
    df2[col] = df2[col].astype(str)

scl = [[0, 'rgb(242,240,247)'],[2, 'rgb(218,218,235)'],[4, 'rgb(188,189,220)'],
       [6, 'rgb(158,154,200)'],[8, 'rgb(117,107,177)'],[10, 'rgb(84,39,143)']]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df2['state'],
        z = df2['color'].astype(float),
        locationmode = 'USA-states',
#         text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "Color Brightness"
        )
    ) ]

layout = dict(
        title = 'Color Brightness Average by State',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )
    
fig = dict( data=data, layout=layout )

# publish online
# py.iplot( fig, filename='d3-cloropleth-map' )

# offline
# plotly.offline.iplot(fig)
# py.image.save_as(fig, filename='svg_output/ch2_fig1.png')



In [6]:

    
%%html
<div>
    <a href="https://plot.ly/~l1990790120271c/29/" target="_blank" title="Color Brightness Average by State" style="display: block; text-align: center;"><img src="https://plot.ly/~l1990790120271c/29.png" alt="Color Brightness Average by State" style="max-width: 100%;width: 600px;"  width="600" onerror="this.onerror=null;this.src='https://plot.ly/404.png';" /></a>
    <script data-plotly="l1990790120271c:29"  src="https://plot.ly/embed.js" async></script>
</div>

	id	timestamp	shoe-type	color	state
0	0	2009-03-25 05:05:40	oxfords	6	UT
1	1	2009-03-14 18:04:11	loafers	4	AS
2	2	2009-03-11 17:06:29	oxfords	8	FL
3	3	2009-03-15 07:18:42	oxfords	9	MS
4	4	2009-03-20 06:56:17	slippers	3	DC