notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [2]:

    
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd



In [20]:

    
plotly.tools.set_credentials_file(username='lis590dv', api_key='0jCaIttf2QaVJ3lRZZlK')



In [4]:

    
import os



In [5]:

    
os.getcwd()









    Out[5]:





'/home/xiaoliangjiang/work'



In [6]:

    
df2 = pd.read_csv('DiscGrants96to13-2017_04_10_19_27_08.csv',encoding='iso-8859-1',sep='\t')
df2.head()









    Out[6]:






  
    
      
      LogNumber
      Institution
      Program
      ProgramType
      ProjectTitle
      ProjectType
      AwardDate
      InstAddr1
      InstAddr2
      InstAddr3
      ...
      FIPSState
      FIPSCounty
      CensusTract
      CensusBlock
      FIPSMCDCode
      FIPSPlaceCode
      CBSACode
      MetroDivisionCode
      Description
      Level
    
  
  
    
      0
      IA-00-00-0001-00
      Museum of the Aleutians
      Conservation Assessment Prog.
      IA
      NaN
      NaN
      3/30/2000
      P.O. Box 648
      NaN
      NaN
      ...
      2.0
      16.0
      200.0
      2014.0
      1615.0
      80770.0
      NaN
      NaN
      NaN
      1.0
    
    
      1
      IA-00-00-0002-00
      Depot Museum, Inc.
      Conservation Assessment Prog.
      IA
      NaN
      NaN
      3/30/2000
      P.O. Box 681420
      NaN
      NaN
      ...
      1.0
      49.0
      961000.0
      2013.0
      91206.0
      27616.0
      NaN
      NaN
      NaN
      1.0
    
    
      2
      IA-00-00-0003-00
      National Voting Rights Museum and Institute
      Conservation Assessment Prog.
      IA
      NaN
      NaN
      3/30/2000
      6 Highway 80 East
      NaN
      NaN
      ...
      1.0
      47.0
      957200.0
      1005.0
      92883.0
      NaN
      42820.0
      NaN
      NaN
      1.0
    
    
      3
      IA-00-00-0004-00
      Bob Jones Museum
      Conservation Assessment Prog.
      IA
      NaN
      NaN
      3/30/2000
      P.O. Box 613
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1.0
    
    
      4
      IA-00-00-0005-00
      Coronado Museum of History and Art
      Conservation Assessment Prog.
      IA
      NaN
      NaN
      3/30/2000
      1100 Orange Avenue
      NaN
      NaN
      ...
      6.0
      73.0
      10900.0
      1021.0
      92780.0
      16378.0
      41740.0
      NaN
      NaN
      1.0
    
  

5 rows × 44 columns



In [7]:

    
df3=df2[(df2["Longitude"]>-95)&(df2["Latitude"]<24)]
#df2=df2[(((df2["Longitude"]>-95)&(df2["Latitude"]<24))==False)]
df2=df2[((df2["InstState"]=="GU")|(df2["InstState"]=="PW")|(df2["InstState"]=="VI")|(df2["InstState"]=="PR")|(df2["InstState"]=="MP")|(df2["InstState"]=="FM")|(df2["InstState"]=="MH")|(df2["InstState"]=="AS")|(df2["InstState"]==""))==False]
#VI,GU,PR,MP,FM,MH,AS,PW
# since some points (Saint Thomas, U.S. Virgin Islands) could not be shown on the US map, so I tried to remove them by using the codes above.



In [8]:

    
#df2.columns



In [9]:

    
#df2.describe()



In [10]:

    
df2['Text'] = df2['Institution'] + '<br>' + df2['Program'] + '<br>' + 'Program Type: ' + df2['ProgramType'] + '<br>' + 'Total Award: ' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#df2['text'] = df2['Institution'] + 'Total Award' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
limits = [(0,10),(10,100),(100,200),(200,500),(500,1000),(1000,3000)]
#colors = ["#ffcccc","#ffddcc","#ffeecc","#ffffcc","#eeffcc","#ddffcc"]
colors = ["e0e0e0","#66b2ff","#66ff66","#ffff66","#ffb266","#ff6666"]
institutions = []
scale = 6000

for i in range(len(limits)):
    subdf2=df2[((df2['AwardTotal']/1e3)<limits[i][1])&((df2['AwardTotal']/1e3)>limits[i][0])]
    institution = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = subdf2['Longitude'],
        lat = subdf2['Latitude'],
        text = subdf2['Text'],
        marker = dict(
            size = subdf2['AwardTotal']/scale,
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name ='{0} - {1}'.format(limits[i][0],limits[i][1])+'  thousand dollar' )
    institutions.append(institution)



In [11]:

    
layout = dict(
        title = 'Administrative Discretionary Grants<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),
        updatemenus=list([
        dict(
            x=-0.05,
            y=1,
            yanchor='top',
            buttons=list([
                dict(
                    args=['visible', [True, True, True, True, True, True]],
                    label='All',
                    method='restyle'
                ),
                dict(
                    args=['visible', [True, False, False, False, False, False]],
                    label='0-10 thousand dollar',
                    method='restyle'
                ),
                dict(
                    args=['visible', [False, True, False, False, False, False]],
                    label='10-100 thousand dollar',
                    method='restyle'
                ),
                dict(
                    args=['visible', [False, False, True, False, False, False]],
                    label='100-200 thousand dollar',
                    method='restyle'
                ),
                dict(
                    args=['visible', [False, False, False, True, False, False]],
                    label='200-500 thousand dollar',
                    method='restyle'
                ),
                dict(
                    args=['visible', [False, False, False, False, True, False]],
                    label='500-1000 thousand dollar',
                    method='restyle'
                ),
                dict(
                    args=['visible', [False, False, False, False, False, True]],
                    label='1000+ thousand dollar',
                    method='restyle'
                )
            ]),
        )
    ]),
    )

fig = dict( data=institutions, layout=layout )
py.iplot( fig, validate=False, filename='q2testworldmap' )









    Out[11]:



In [12]:

    
df2['Text'] = df2['Institution'] + '<br>' + df2['Program'] + '<br>' + 'Program Type: ' + df2['ProgramType'] + '<br>' + 'Total Award: ' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#df2['text'] = df2['Institution'] + 'Total Award' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#limits = [(0,10),(10,100),(100,200),(200,500),(500,1000),(1000,3000)]
#colors = ["#ffcccc","#ffddcc","#ffeecc","#ffffcc","#eeffcc","#ddffcc"]
colors = ["#ff6666","#ffb266","#ffff66","#b2ff66","#66ff66","#66ffb2","#66ffff","#66b2ff","#6666ff","#ff66ff","#b266ff","#ff66b2","#000000","#404040","#808080","#c0c0c0","#ffffff"]
pt=["IL","ST","ML","RE","IC","IM","IS","MN","LE","MP","MH","LT","LI","IG","MA","IA","IG"]
institutions = []
scale = 5000

for i in range(len(colors)):
    subdf2=df2[df2["ProgramType"]==pt[16-i]]
    institution = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = subdf2['Longitude'],
        lat = subdf2['Latitude'],
        text = subdf2['Text'],
        marker = dict(
            size = subdf2['AwardTotal']/scale,
            color = colors[16-i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name =pt[i])
    institutions.append(institution)
layout = dict(
        title = 'Administrative Discretionary Grants<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig = dict( data=institutions, layout=layout )
py.iplot( fig, validate=False, filename='q2testworldmap' )









    Out[12]:



In [13]:

    
df2['Text'] = df2['Institution'] + '<br>' + df2['Program'] + '<br>' + 'Program Type: ' + df2['ProgramType'] + '<br>' + 'Total Award: ' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#df2['text'] = df2['Institution'] + 'Total Award' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#limits = [(0,10),(10,100),(100,200),(200,500),(500,1000),(1000,3000)]
#colors = ["#ffcccc","#ffddcc","#ffeecc","#ffffcc","#eeffcc","#ddffcc"]
colors = ["#ff6666","#ffb266","#ffff66","#b2ff66","#66ff66","#66ffb2","#66ffff","#66b2ff","#6666ff","#ff66ff","#b266ff","#ff66b2","#000000","#404040","#808080","#c0c0c0","#ffffff"]
pt=["IL","ST","ML","RE","IC","IM","IS","MN","LE","MP","MH","LT","LI","IG","MA","IA","IG"]
institutions = []
scale = 6000

for i in range(len(colors)):
    subdf2=df2[df2["ProgramType"]==pt[i]]
    institution = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = subdf2['Longitude'],
        lat = subdf2['Latitude'],
        text = subdf2['Text'],
        marker = dict(
            size = subdf2['AwardTotal']/scale,
            color = colors[16-i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name =pt[i])
    institutions.append(institution)
layout = dict(
        title = 'Administrative Discretionary Grants<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig = dict( data=institutions, layout=layout )
py.iplot( fig, validate=False, filename='q2testworldmap' )









    Out[13]:



In [14]:

    
df2['Text'] = df2['Institution'] + '<br>' + df2['Program'] + '<br>' + 'Program Type: ' + df2['ProgramType'] + '<br>' + 'Total Award: ' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#df2['text'] = df2['Institution'] + 'Total Award' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
limits = [(0,10),(10,100),(100,200),(200,500),(500,1000),(1000,3000)]
#colors = ["#ffcccc","#ffddcc","#ffeecc","#ffffcc","#eeffcc","#ddffcc"]
colors = ["e0e0e0","#66b2ff","#66ff66","#ffff66","#ffb266","#ff6666"]
institutions = []
scale = 6000

for i in range(len(limits)):
    subdf2=df2[((df2['AwardTotal']/1e3)<limits[i][1])&((df2['AwardTotal']/1e3)>limits[i][0])]
    subdf2=subdf2[1:50]
    institution = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = subdf2['Longitude'],
        lat = subdf2['Latitude'],
        text = subdf2['Text'],
        marker = dict(
            size = subdf2['AwardTotal']/scale,
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name ='{0} - {1}'.format(limits[i][0],limits[i][1])+'  thousand dollar' )
    institutions.append(institution)
layout = dict(
        title = 'Temp Scatter Plot of Top 50 for Administrative Discretionary Grants<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig = dict( data=institutions, layout=layout )
py.iplot( fig, validate=False, filename='q2testworldmap' )









    Out[14]:



In [15]:

    
df2['Text'] = df2['Institution'] + '<br>' + df2['Program'] + '<br>' + 'Program Type: ' + df2['ProgramType'] + '<br>' + 'Total Award: ' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
#df2['text'] = df2['Institution'] + 'Total Award' + (df2['AwardTotal']/1e3).astype(str)+ ' thousand'
limits = [(0,10),(10,100),(100,200),(200,500),(500,1000),(1000,3000)]
#colors = ["#ffcccc","#ffddcc","#ffeecc","#ffffcc","#eeffcc","#ddffcc"]
colors = ["e0e0e0","#66b2ff","#66ff66","#ffff66","#ffb266","#ff6666"]
institutions = []
scale = 6000


for i in range(len(limits)):
    subdf2=df2[((df2['AwardTotal']/1e3)<limits[i][1])&((df2['AwardTotal']/1e3)>limits[i][0])]
    institution = dict(
        type = 'scattergeo',
        locationmode = 'ISO-3',
        lon = subdf2['Longitude'],
        lat = subdf2['Latitude'],
        text = subdf2['Text'],
        marker = dict(
            size = subdf2['AwardTotal']/scale,
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name ='{0} - {1}'.format(limits[i][0],limits[i][1])+'  thousand dollar' )
    institutions.append(institution)
layout = dict(
        title = 'Administrative Discretionary Grants<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig = dict( data=institutions, layout=layout )
py.iplot( fig, validate=False, filename='q2testworldmap' )









    Out[15]:



In [21]:

    
df4 = pd.read_csv('dftemp 2017_04_14_01_20_50.csv',encoding='iso-8859-1')



In [22]:

    
for col in df4.columns:
    df4[col] = df4[col].astype(str)
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

df4['Text'] = 'State: ' + df4['states']+ '<br>Total Amount: '+ df4['a']



In [23]:

    
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df4['states'],
        z = df4['a'].astype(float),
        locationmode = 'USA-states',
        text = df4['Text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Millions USD")
        ) ]

layout = dict(
        title = '2011 US Agriculture Exports by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )









    



High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~lis590dv/0 or inside your plot.ly account where it is named 'd3-cloropleth-map'






    Out[23]:



In [24]:

    
dfby = pd.read_csv('dfperyear 2017_04_14_16_22_04.csv',encoding='iso-8859-1')



In [25]:

    
def AmountbyYear(input):
    year=input-1996
    for col in dfby.columns:
        dfby[col] = dfby[col].astype(str)
        
    scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
    dftext=[]
    dftext= 'State: ' + dfby.columns+ '<br>Total Amount: '+ dfby.loc[year,] + ' dollar'
    data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = dfby.columns,
        z = dfby.loc[year,],
        locationmode = 'USA-states',
        text = dftext,
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Millions USD")
        ) ]

    layout = dict(
        title = '%s US Administrative Discretionary Grants<br>(Hover for breakdown)' % year,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
    fig = dict( data=data, layout=layout )
    py.iplot( fig, filename='d3-cloropleth-map' )



In [26]:

    
AmountbyYear(1997)









    



High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~lis590dv/0 or inside your plot.ly account where it is named 'd3-cloropleth-map'



In [27]:

    
year1=2012



In [28]:

    
year=year1-1996
for col in dfby.columns:
    dfby[col] = dfby[col].astype(str)
        
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
dftext=[]
dftext= 'State: ' + dfby.columns+ '<br>Total Amount: '+ dfby.loc[year,] + ' dollar'
data = [ dict(
    type='choropleth',
    colorscale = scl,
    autocolorscale = False,
    locations = dfby.columns,
    z = dfby.loc[year,],
    locationmode = 'USA-states',
    text = dftext,
    marker = dict(
         line = dict (
             color = 'rgb(255,255,255)',
              width = 2
         ) ),
     colorbar = dict(
        title = "Millions USD")
    ) ]

layout = dict(
    title = '%s US Administrative Discretionary Grants<br>(Hover for breakdown)' % year1,
    geo = dict(
        scope='usa',
        projection=dict( type='albers usa' ),
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )









    



High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~lis590dv/0 or inside your plot.ly account where it is named 'd3-cloropleth-map'






    Out[28]:



In [ ]:

	LogNumber	Institution	Program	ProgramType	ProjectTitle	ProjectType	AwardDate	InstAddr1	InstAddr2	InstAddr3	...	FIPSState	FIPSCounty	CensusTract	CensusBlock	FIPSMCDCode	FIPSPlaceCode	CBSACode	MetroDivisionCode	Description	Level
0	IA-00-00-0001-00	Museum of the Aleutians	Conservation Assessment Prog.	IA	NaN	NaN	3/30/2000	P.O. Box 648	NaN	NaN	...	2.0	16.0	200.0	2014.0	1615.0	80770.0	NaN	NaN	NaN	1.0
1	IA-00-00-0002-00	Depot Museum, Inc.	Conservation Assessment Prog.	IA	NaN	NaN	3/30/2000	P.O. Box 681420	NaN	NaN	...	1.0	49.0	961000.0	2013.0	91206.0	27616.0	NaN	NaN	NaN	1.0
2	IA-00-00-0003-00	National Voting Rights Museum and Institute	Conservation Assessment Prog.	IA	NaN	NaN	3/30/2000	6 Highway 80 East	NaN	NaN	...	1.0	47.0	957200.0	1005.0	92883.0	NaN	42820.0	NaN	NaN	1.0
3	IA-00-00-0004-00	Bob Jones Museum	Conservation Assessment Prog.	IA	NaN	NaN	3/30/2000	P.O. Box 613	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.0
4	IA-00-00-0005-00	Coronado Museum of History and Art	Conservation Assessment Prog.	IA	NaN	NaN	3/30/2000	1100 Orange Avenue	NaN	NaN	...	6.0	73.0	10900.0	1021.0	92780.0	16378.0	41740.0	NaN	NaN	1.0