In [1]:
from __future__ import division,unicode_literals
%matplotlib inline
import numpy as np
import pandas as pd
import json
import runProcs
from urllib.request import urlopen

import matplotlib.pyplot as plt

In [2]:
# 0. State abbreviations

# 0.1 dictionary:
stateAbbr = {
u'Alabama':u'AL',
u'Alaska':u'AK',
u'Arizona':u'AZ',
u'Arkansas':u'AR',
u'California':u'CA',
u'Colorado':u'CO',
u'Connecticut':u'CT',
u'Delaware':u'DE',
u'District of Columbia':u'DC',
u'Florida':u'FL',
u'Georgia':u'GA',
u'Hawaii':u'HI',
u'Idaho':u'ID',
u'Illinois':u'IL',
u'Indiana':u'IN',
u'Iowa':u'IA',
u'Kansas':u'KS',
u'Kentucky':u'KY',
u'Louisiana':u'LA',
u'Maine':u'ME',
u'Maryland':u'MD',
u'Massachusetts':u'MA',
u'Michigan':u'MI',
u'Minnesota':u'MN',
u'Mississippi':u'MS',
u'Missouri':u'MO',
u'Montana':u'MT',
u'Nebraska':u'NE',
u'Nevada':u'NV',
u'New Hampshire':u'NH',
u'New Jersey':u'NJ',
u'New Mexico':u'NM',
u'New York':u'NY',
u'North Carolina':u'NC',
u'North Dakota':u'ND',
u'Ohio':u'OH',
u'Oklahoma':u'OK',
u'Oregon':u'OR',
u'Pennsylvania':u'PA',
u'Rhode Island':u'RI',
u'South Carolina':u'SC',
u'South Dakota':u'SD',
u'Tennessee':u'TN',
u'Texas':u'TX',
u'Utah':u'UT',
u'Vermont':u'VT',
u'Virginia':u'VA',
u'Washington':u'WA',
u'West Virginia':u'WV',
u'Wisconsin':u'WI',
u'Wyoming':u'WY'
}

# 0.2 List of states in the US
stateList = [s for s in stateAbbr]

In [3]:
# 1. Construct series for price deflator

# 1.1 Obtain data from BEA
gdpDeflator = urlopen('http://bea.gov/api/data/?UserID=3EDEAA66-4B2B-4926-83C9-FD2089747A5B&method=GetData&datasetname=NIPA&TableID=13&Frequency=A&Year=X&ResultFormat=JSON&')

# result = gdpDeflator.readall().decode('utf-8')
result = gdpDeflator.read().decode('utf-8')
jsonResponse = json.loads(result)

In [4]:
# 1.2 Construct the data frame for the deflator series
values = []
years = []
for element in jsonResponse['BEAAPI']['Results']['Data']:
#     if element['LineDescription'] == 'Personal consumption expenditures':
    if element['LineDescription'] == 'Gross domestic product':
        years.append(element['TimePeriod'])
        values.append(float(element['DataValue'])/100)

values = np.array([values]).T
dataP = pd.DataFrame(values,index = years,columns = ['price level'])

# 1.3 Display the data
print(dataP)


      price level
1929      0.09896
1930      0.09535
1931      0.08555
1932      0.07553
1933      0.07345
1934      0.07749
1935      0.07908
1936      0.08001
1937      0.08347
1938      0.08109
1939      0.08033
1940      0.08131
1941      0.08680
1942      0.09369
1943      0.09795
1944      0.10027
1945      0.10288
1946      0.11618
1947      0.12887
1948      0.13605
1949      0.13581
1950      0.13745
1951      0.14716
1952      0.14972
1953      0.15157
1954      0.15298
1955      0.15559
1956      0.16091
1957      0.16625
1958      0.17001
...           ...
1987      0.59885
1988      0.61982
1989      0.64392
1990      0.66773
1991      0.68996
1992      0.70569
1993      0.72248
1994      0.73785
1995      0.75324
1996      0.76699
1997      0.78012
1998      0.78859
1999      0.80065
2000      0.81887
2001      0.83754
2002      0.85039
2003      0.86735
2004      0.89120
2005      0.91988
2006      0.94814
2007      0.97337
2008      0.99246
2009      1.00000
2010      1.01221
2011      1.03311
2012      1.05214
2013      1.06913
2014      1.08828
2015      1.09998
2016      1.11445

[88 rows x 1 columns]

In [5]:
# 2. Construct series for per capita income by state, region, and the entire us

# 2.1 Obtain data from BEA
stateYpc = urlopen('http://bea.gov/api/data/?UserID=3EDEAA66-4B2B-4926-83C9-FD2089747A5B&method=GetData&datasetname=RegionalData&KeyCode=PCPI_SI&Year=ALL&GeoFips=STATE&ResultFormat=JSON&')
# result = stateYpc.readall().decode('utf-8')
result = stateYpc.read().decode('utf-8')
jsonResponse = json.loads(result)
# jsonResponse['BEAAPI']['Results']['Data'][0]['GeoName']

In [6]:
# 2.2 Construct the data frame for the per capita income series


# 2.2.1 Initialize the dataframe
regions = []
years = []
for element in jsonResponse['BEAAPI']['Results']['Data']:
    if element['GeoName'] not in regions:
        regions.append(element['GeoName'])
    if element['TimePeriod'] not in years:
        years.append(element['TimePeriod'])

df = np.zeros([len(years),len(regions)])
dataY = pd.DataFrame(df,index = years,columns = regions)
# 2.2.2 Populate the dataframe with values
for element in jsonResponse['BEAAPI']['Results']['Data']:
    try:
        dataY[element['GeoName']][element['TimePeriod']] = np.round(float(element[u'DataValue'])/float(dataP.loc[element['TimePeriod']]),2)# real
    except:
        dataY[element['GeoName']][element['TimePeriod']] = np.nan
        
# 2.2.3 Replace the state names in the index with abbreviations
columns=[]
for r in regions:
    if r in stateList:
        columns.append(stateAbbr[r])
    else:
        columns.append(r)
        
dataY.columns=columns

# 2.2.4 Display the data obtained from the BEA
dataY


Out[6]:
United States AL AK AZ AR CA CO CT DE DC ... WI WY New England Mideast Great Lakes Plains Southeast Southwest Rocky Mountain Far West
1929 7063.46 3223.52 NaN 6042.85 3051.74 10034.36 6366.21 10398.14 10418.35 13369.04 ... 6770.41 6790.62 8852.06 9822.15 8043.65 5709.38 3668.15 4749.39 5951.90 9145.11
1930 6512.85 2758.26 NaN 5432.62 2349.24 9323.54 6019.93 9711.59 8998.43 13780.81 ... 6124.80 6103.83 8442.58 9334.03 7110.64 5285.79 3251.18 4174.10 5579.44 8515.99
1931 6171.83 2571.60 NaN 5026.30 2419.64 8801.87 5517.24 9421.39 9059.03 14646.41 ... 5482.17 5575.69 8392.75 8930.45 6592.64 4862.65 3167.74 3892.46 4909.41 7983.64
1932 5322.39 2105.12 NaN 4249.97 2012.45 7732.03 4713.36 8301.34 7824.71 14656.43 ... 4792.80 4964.91 7586.39 7851.18 5428.31 4130.81 2687.67 3296.70 4369.12 6964.12
1933 5105.51 2232.81 NaN 4206.94 2083.05 7474.47 4805.99 8032.68 7705.92 12865.90 ... 4533.70 5064.67 7270.25 7420.01 5132.74 3771.27 2791.01 3349.22 4261.40 6739.28
1934 5510.39 2671.31 NaN 4671.57 2348.69 7807.46 4787.71 8530.13 8349.46 12427.41 ... 4903.86 5342.62 7575.17 7768.74 5807.20 3987.61 3174.60 3639.18 4723.19 7175.12
1935 6031.87 2731.41 NaN 5260.50 2579.67 8409.21 5627.21 9041.48 8915.02 13037.43 ... 5829.54 6297.42 7903.39 8168.94 6562.97 5096.11 3388.97 4046.54 5526.05 7713.71
1936 6736.66 3112.11 NaN 5774.28 3037.12 9711.29 6761.65 10186.23 10886.14 14285.71 ... 6474.19 6899.14 8736.41 9148.86 7424.07 5174.35 3837.02 4524.43 6286.71 8923.88
1937 6936.62 3186.77 NaN 6050.08 3031.03 9608.24 6397.51 10410.93 11441.24 14496.23 ... 6601.17 7260.09 8673.78 9188.93 7883.07 5702.65 3965.50 4899.96 6002.16 8841.50
1938 6548.28 2984.34 NaN 5907.02 2811.69 9618.94 6264.64 9606.61 9865.58 14070.79 ... 6264.64 6905.91 8176.10 8755.70 7078.55 5401.41 3736.59 4809.47 6005.67 8842.03
1939 6983.69 3124.61 NaN 6112.29 3062.37 9846.88 6460.85 10544.01 11290.92 14490.23 ... 6411.05 7294.91 8888.34 9336.49 7755.51 5676.58 3996.02 4991.91 6236.77 9162.21
1940 7379.17 3455.91 NaN 6223.10 3160.74 10527.61 6739.64 11437.71 12753.66 15004.30 ... 6739.64 7403.76 9445.33 9863.49 8215.47 5903.33 4243.02 5202.31 6505.96 9789.69
1941 8410.14 4366.36 NaN 7476.96 3963.13 11923.96 7546.08 13352.53 13571.43 14642.86 ... 7753.46 9124.42 10633.64 10748.85 9435.48 6877.88 5149.77 5956.22 7546.08 11359.45
1954 12138.84 7661.13 19185.51 11491.70 7216.63 15119.62 12138.84 15590.27 15302.65 17106.81 ... 11642.04 12570.27 12988.63 13727.28 13204.34 11465.55 8746.24 10772.65 11550.53 14773.17
1955 12558.65 8425.99 18606.59 11716.69 7751.14 15862.20 12552.22 16151.42 16427.79 16813.42 ... 12115.17 12790.03 13638.41 14159.01 13747.67 11298.93 9184.39 11041.84 11928.79 15341.60
1956 12814.62 8688.09 18842.83 11975.64 7867.75 16071.10 12740.04 16947.36 17463.18 17593.69 ... 12435.52 13050.77 14014.05 14560.93 13982.97 11360.39 9433.85 11217.45 12131.00 15486.92
1957 12878.20 8824.06 17497.74 11867.67 7729.32 16072.18 13221.05 17172.93 16306.77 17618.05 ... 12451.13 13329.32 14177.44 14742.86 13864.66 11717.29 9431.58 11314.29 12409.02 15446.62
1958 12722.78 8846.54 17740.13 11628.73 7905.42 15852.01 13210.99 15699.08 16128.46 17746.01 ... 12240.46 13169.81 13752.13 14481.50 13305.10 12128.70 9505.32 11340.51 12387.51 15246.16
1959 13122.93 9056.10 18210.83 11962.64 8365.72 16441.38 13633.46 16041.07 16319.55 17926.55 ... 12919.88 13511.63 14161.40 14927.19 13842.32 12107.68 9856.70 11620.35 12629.81 15832.22
1960 13292.52 9115.36 20170.52 12273.98 8182.65 16708.63 13916.23 16342.41 16599.91 17795.83 ... 12932.02 13504.23 14425.50 15209.43 14042.11 12359.81 9870.68 11575.88 12800.41 16113.53
1961 13503.88 9196.90 18942.78 12417.23 8715.83 16860.04 14222.65 16837.40 16509.14 18201.37 ... 13051.11 13809.50 14794.27 15450.79 14103.80 12564.38 10125.08 11811.65 12983.19 16322.37
1962 13971.82 9515.82 18852.73 12624.40 8967.91 17337.58 14385.55 17359.95 17013.31 19093.15 ... 13491.00 14284.92 15308.06 15962.21 14648.33 13172.31 10505.42 11964.67 13574.86 16823.21
1963 14313.36 9896.06 19394.07 12699.03 9287.93 17713.40 14611.90 17586.25 17492.26 19819.77 ... 13583.59 14329.94 15535.16 16275.98 14987.84 13500.66 10929.90 12184.87 13705.22 17182.66
1964 14897.09 10459.54 20815.64 13111.18 9811.61 18387.24 14962.43 18131.33 18109.55 20706.74 ... 14232.82 14423.39 16084.07 16971.58 15648.48 13797.23 11455.95 12708.27 13993.25 17826.42
1965 15607.96 10993.48 21404.13 13426.37 10180.73 18826.86 15570.53 18752.01 19147.68 21933.48 ... 14902.15 14928.88 16725.48 17618.44 16543.69 14843.33 12078.92 13271.31 14666.88 18361.67
1942 9926.35 5678.30 NaN 10171.84 5240.69 14131.71 9776.92 15359.16 13992.96 15732.74 ... 9307.29 10395.99 12082.40 11997.01 10769.56 8666.88 6521.51 7748.96 9787.60 13790.16
1943 11567.13 6901.48 NaN 10668.71 5839.71 16222.56 10893.31 16539.05 15211.84 16641.14 ... 10872.89 11955.08 13415.01 13721.29 12812.66 10127.62 7667.18 9504.85 11281.27 15885.66
1944 12147.20 7509.72 NaN 10731.03 6961.20 16116.49 10830.76 16176.32 15238.86 16655.03 ... 11129.95 12386.56 13393.84 14600.58 13234.27 10591.40 8367.41 10362.02 11139.92 15797.35
1945 12257.00 7766.33 NaN 11168.35 7319.21 15756.22 11790.44 15474.34 15075.82 17039.27 ... 11576.59 12383.36 13287.33 14861.98 13238.72 11012.83 8563.37 10312.99 11605.75 15250.78
1946 11008.78 6576.00 NaN 9743.50 6541.57 14692.72 10604.23 13788.95 13590.98 15725.60 ... 10457.91 11749.01 12119.13 13281.12 11757.62 10225.51 7548.63 8943.02 10578.41 14029.95
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1987 27160.39 21412.71 32295.23 25373.63 20300.58 30877.52 27380.81 36302.91 29544.96 33509.23 ... 25714.29 23646.99 32031.39 30789.01 26652.75 25954.75 23990.98 24037.74 24074.48 29929.03
1988 28114.61 22208.06 32373.92 25678.42 21036.75 31631.76 27979.09 38388.56 30687.94 35936.24 ... 26143.07 23903.71 33719.47 32380.37 27501.53 26135.01 24947.57 24529.70 24584.56 30721.82
1989 28967.88 23240.46 33886.20 25933.35 21693.69 31954.28 29017.58 39978.57 32361.16 37248.42 ... 27268.92 25580.82 34504.29 33552.30 28385.51 27032.86 25863.46 25038.82 25588.58 31270.97
1990 29339.70 23692.21 34239.89 25764.90 21917.54 32189.66 29369.66 39563.90 32215.12 38535.04 ... 27524.60 26940.53 34091.62 34179.98 28764.62 27454.21 26160.27 25552.24 26107.86 31605.59
1991 28965.45 23923.13 33551.22 25469.59 22157.81 31630.82 29401.70 38439.91 32332.31 39250.10 ... 27336.37 27029.10 33416.43 33470.06 28242.22 27272.60 26056.58 25372.49 26195.72 31213.40
1992 29843.13 24850.86 33921.41 25689.75 23311.94 32087.74 30292.34 40468.19 32478.85 40760.11 ... 28533.78 27571.60 34519.41 34417.38 29378.34 28325.47 26928.25 26141.79 26976.43 31774.58
1993 30032.67 25027.68 34261.16 26021.48 23510.69 31784.96 31007.09 40787.29 32424.43 41642.68 ... 28973.81 28028.46 34813.42 34449.40 29725.39 28301.13 27346.09 26433.95 27650.59 31625.79
1994 30545.50 25666.46 34429.76 26773.73 24072.64 31896.73 31740.87 40897.20 32490.34 42137.29 ... 29836.69 28282.17 35362.20 34707.60 30649.86 29297.28 27925.73 26848.28 28176.46 31823.54
1995 31288.83 26374.06 34571.98 27340.56 24665.45 32652.28 32895.23 42114.07 33152.78 42157.88 ... 30413.95 28710.64 36368.22 35618.13 31401.68 29809.89 28664.17 27510.49 29091.66 32522.17
1996 32240.32 26817.82 34635.39 28125.53 25351.05 33748.81 34016.09 43211.78 34302.92 43724.17 ... 31326.35 29257.23 37501.14 36658.89 32328.97 31240.30 29408.47 28401.93 30018.64 33578.01
1997 33264.11 27534.23 35650.93 29072.45 25828.08 34798.49 35107.42 45139.21 34734.40 45628.88 ... 32442.44 30722.20 38969.65 37906.99 33376.92 32036.10 30201.76 29628.78 30924.73 34613.91
1998 34885.05 28722.15 36484.10 30525.37 26869.48 36813.81 37624.11 48022.42 37589.88 48136.55 ... 34210.43 32223.34 41077.11 39561.75 34938.31 33530.73 31648.89 31150.53 32755.93 36515.81
1999 35754.70 29068.88 36842.57 30956.10 27267.84 38061.57 39047.02 49740.84 39338.04 48937.74 ... 34960.34 33908.70 42631.61 40790.61 35686.01 34114.78 32278.77 31633.05 33791.29 37700.62
2000 37371.01 29623.75 38652.04 32034.39 27823.71 40415.45 41552.38 52861.87 42045.75 52392.93 ... 36247.51 35373.14 45627.51 42863.95 37009.54 35451.29 33381.37 33060.19 35492.81 39775.54
2001 37657.90 29850.51 39530.05 31957.88 28481.03 40202.26 41706.66 54053.54 44674.88 53432.67 ... 36905.70 36632.28 46445.54 43396.14 37075.24 35699.79 33763.16 33724.96 35766.65 39549.16
2002 37412.25 29968.60 40300.33 31931.23 28501.04 39865.24 40928.28 52936.89 44508.99 53564.83 ... 37086.51 36943.05 45861.31 43153.14 36839.57 35696.56 33729.23 33218.88 35414.34 39234.94
2003 37691.82 30567.82 41034.18 32473.63 29361.85 40622.59 40504.99 52421.74 44027.21 53556.23 ... 36986.22 38351.30 45954.92 43321.61 36805.21 36337.12 34088.89 33395.98 35397.48 39892.78
2004 38505.39 31834.61 41282.54 33637.79 30079.67 42135.32 40335.50 53534.56 43761.22 57293.54 ... 37304.76 39243.72 46945.69 44346.95 37094.93 37074.73 35006.73 33735.41 35749.55 41319.57
2005 39031.18 32397.70 42262.03 35100.23 30386.57 42963.21 41336.91 54600.60 42906.68 58561.99 ... 37149.41 41561.94 47445.32 44711.27 36954.82 36950.47 35740.53 35111.10 36662.39 42027.22
2006 40230.35 33027.82 43079.08 36603.24 30911.05 44649.52 42338.68 57155.06 43604.32 60144.07 ... 38109.35 45571.33 49420.97 46127.15 37679.03 37795.05 36786.76 36485.12 37997.55 43555.80
2007 40910.45 33472.37 44919.20 36729.10 31819.35 44887.35 43144.95 59503.58 42978.52 61975.41 ... 38600.94 45942.45 50547.07 47441.36 38204.38 38932.78 37250.99 37036.28 38793.06 43963.75
2008 41394.11 33683.98 48154.08 35988.35 32385.18 44497.51 42987.12 61697.20 42149.81 62574.81 ... 39168.33 48909.78 51667.57 47894.12 38642.36 40379.46 37353.65 38748.16 39011.14 43780.10
2009 39376.00 32685.00 46834.00 33746.00 31372.00 42224.00 39838.00 60428.00 41317.00 59998.00 ... 38012.00 43549.00 50537.00 46227.00 36826.00 38560.00 35595.00 35779.00 36244.00 41414.00
2010 39791.15 33290.52 48026.60 33153.20 31414.43 42794.48 39444.38 61376.59 40592.37 61113.80 ... 38131.42 44901.75 51432.02 46786.73 37229.43 38911.89 35953.01 36246.43 36100.22 41804.57
2011 41100.17 33595.65 49757.53 33801.82 32697.39 44379.59 41578.34 61802.71 42311.08 63666.99 ... 39443.04 47896.16 52323.57 48141.05 38613.51 40682.99 36847.00 37993.05 37840.11 43189.01
2012 42087.55 33723.65 50038.97 34278.71 34357.59 45972.02 42854.56 61809.26 41411.79 63294.81 ... 40429.03 50153.02 52966.34 49319.48 39442.47 41981.11 37272.61 39294.20 39067.99 44719.33
2013 41616.08 33464.59 48127.92 34194.16 33658.21 45429.46 43796.36 59984.29 41001.56 62237.52 ... 39965.21 49309.25 51927.27 48850.00 39334.79 41510.39 36539.99 39009.29 39428.32 44203.23
2014 42694.89 33956.33 50177.34 34968.02 34532.47 46986.07 45781.42 61353.70 41655.64 64751.72 ... 40811.19 51519.83 53116.84 49899.84 40160.62 42157.35 37429.71 40387.58 40828.65 45706.07
2015 43809.89 34609.72 51093.66 35652.47 34779.72 49045.44 46338.12 62566.59 43388.97 66823.94 ... 41766.21 50944.56 54792.81 51227.30 41235.30 42784.41 38375.24 40994.38 41538.03 47473.59
2016 44480.24 35202.12 49627.17 36110.19 35304.41 50237.34 46712.73 63738.17 43695.99 67832.56 ... 42420.03 49541.93 56053.66 52085.78 42030.60 43195.30 38987.84 41044.46 41845.75 48506.44

88 rows × 60 columns


In [7]:
# 3. State income data for 1840, 1880, and 1900

# 3.1.1 Import Easterlin's income data
easterlin_data = pd.read_csv('Historical Statistics of the US - Easterlin State Income Data.csv',index_col=0)

# 3.1.2 Import historic CPI data
historic_cpi_data=pd.read_csv('Historical Statistics of the US - cpi.csv',index_col=0)
historic_cpi_data = historic_cpi_data/historic_cpi_data.loc[1929]*float(dataP.loc['1929'])

In [8]:
# 3.2 Append to data beginning in 1929

# 3.2.1 Construct series for real incomes in 1840, 1880, and 1900
df_1840 = easterlin_data['Income per capita - 1840 - A [cur dollars]']/float(historic_cpi_data.loc[1840])
df_1880 = easterlin_data['Income per capita - 1880 [cur dollars]']/float(historic_cpi_data.loc[1890])
df_1900 = easterlin_data['Income per capita - 1900 [cur dollars]']/float(historic_cpi_data.loc[1900])

# 3.2.2 Put into a DataFrame and concatenate with previous
df = pd.DataFrame({'1840':df_1840,'1880':df_1880,'1900':df_1900}).transpose()
df = pd.concat([dataY,df]).sort_index()

In [9]:
# 4. Export data to csv
# series = dataY.sort_index()
series = df.sort_index()
dropCols = [u'AK', u'HI', u'New England', u'Mideast', u'Great Lakes', u'Plains', u'Southeast', u'Southwest', u'Rocky Mountain', u'Far West']
for c in dropCols:
    series = series.drop([c],axis=1)

series.to_csv('stateIncomeData.csv',na_rep='NaN')

In [10]:
# 4. Export notebook to .py
runProcs.exportNb('stateIncomeData')