In [1]:
import pandas as pd
import numpy as np
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
len(statelist)
statelist[51]


Out[1]:
'WY'

In [2]:
df = pd.read_csv('%s.csv' % (statelist[0]))
print(df.columns)
df.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
df.head()


Index(['Year', 'HYTCP', 'WYTCP', 'SOEGP', 'NUETP', 'GDP', 'CLPRB'], dtype='object')
Out[2]:
Year HYTCP WYTCP SOEGP NUETP GDP CLPRB
0 1960.0 290.0 0.0 0.0 0.0 NaN 11263
1 1961.0 297.0 0.0 0.0 0.0 NaN 11497
2 1962.0 304.0 0.0 0.0 0.0 NaN 13587
3 1963.0 325.0 0.0 0.0 0.0 1083.0 13306
4 1964.0 322.0 0.0 0.0 0.0 1230.0 11622

In [4]:
#last line
df.iloc[54]


Out[4]:
Year      2014.0
HYTCP     1539.0
WYTCP        0.0
SOEGP        0.0
NUETP      152.0
GDP      58253.0
CLPRB        0.0
Name: 54, dtype: float64

In [58]:
#inplace true means operate on original data
df.drop(df.index[55],inplace=True)

In [22]:
CLPRB=pd.read_excel("..\CLPRB.xlsx")

In [41]:
#CLPRB
import xlrd
data = xlrd.open_workbook('..\CLPRB.xlsx') # 打开xls文件
table = data.sheets()[0] # 打开第一张表

In [121]:
df['CLPRB']=1  
df.head()
df['CLPRB'][0]=2
df.head()


C:\Users\RicardoZhang\Miniconda3\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
Out[121]:
Year HYTCP WYTCP SOEGP NUETP GDP CLPRB
0 1960.0 6239.0 0.0 0.0 0.0 NaN 2
1 1961.0 6747.0 0.0 0.0 0.0 NaN 1
2 1962.0 7540.0 0.0 0.0 0.0 NaN 1
3 1963.0 6384.0 0.0 0.0 0.0 7343.0 1
4 1964.0 8760.0 0.0 0.0 0.0 8220.0 1

In [111]:
nrows


Out[111]:
51

In [103]:
for i in range(nrows): # 循环逐行打印
    df['CLPRB'][i] = table.row_values(i+1)[3] # columns 4 
    #print(table.row_values(i)[3:4]) # 取前十三列


C:\Users\RicardoZhang\Miniconda3\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-103-96306522342f> in <module>()
      1 for i in range(nrows): # 循环逐行打印
----> 2     df['CLPRB'][i] = table.row_values(i+1)[3] # columns 4
      3     #print(table.row_values(i)[3:4]) # 取前十三列

C:\Users\RicardoZhang\Miniconda3\lib\site-packages\xlrd\sheet.py in row_values(self, rowx, start_colx, end_colx)
    478     def row_values(self, rowx, start_colx=0, end_colx=None):
    479         if end_colx is None:
--> 480             return self._cell_values[rowx][start_colx:]
    481         return self._cell_values[rowx][start_colx:end_colx]
    482 

IndexError: list index out of range

In [126]:
nrows = table.nrows # 获取表的行数
for i in range(nrows): # 循环逐行打印
    print(table.row_values(i)[:13]) # 取前十三列 
table.row_values(1)[3]


['MSN', 'StateCode', 'Year', 'Data']
['CLPRB', 'AL', 1960.0, 318818.42136]
['CLPRB', 'AL', 1961.0, 316466.0604]
['CLPRB', 'AL', 1962.0, 315608.4288]
['CLPRB', 'AL', 1963.0, 302841.96984]
['CLPRB', 'AL', 1964.0, 353711.7756]
['CLPRB', 'AL', 1965.0, 363439.76832]
['CLPRB', 'AL', 1966.0, 348418.96344]
['CLPRB', 'AL', 1967.0, 379465.22736]
['CLPRB', 'AL', 1968.0, 402841.8144]
['CLPRB', 'AL', 1969.0, 427737.63456]
['CLPRB', 'AL', 1970.0, 503797.3056]
['CLPRB', 'AL', 1971.0, 439719.9732]
['CLPRB', 'AL', 1972.0, 510021.26064]
['CLPRB', 'AL', 1973.0, 453403.4016]
['CLPRB', 'AL', 1974.0, 463397.29968]
['CLPRB', 'AL', 1975.0, 534692.772]
['CLPRB', 'AL', 1976.0, 508453.46469]
['CLPRB', 'AL', 1977.0, 505587.897]
['CLPRB', 'AL', 1978.0, 492161.72694]
['CLPRB', 'AL', 1979.0, 579943.31664]
['CLPRB', 'AL', 1980.0, 633365.46117]
['CLPRB', 'AL', 1981.0, 592488.95728]
['CLPRB', 'AL', 1982.0, 645392.32692]
['CLPRB', 'AL', 1983.0, 577138.5876]
['CLPRB', 'AL', 1984.0, 657109.10128]
['CLPRB', 'AL', 1985.0, 676254.31104]
['CLPRB', 'AL', 1986.0, 634516.4114]
['CLPRB', 'AL', 1987.0, 627489.706]
['CLPRB', 'AL', 1988.0, 649790.70768]
['CLPRB', 'AL', 1989.0, 680322.32664]
['CLPRB', 'AL', 1990.0, 707761.5605]
['CLPRB', 'AL', 1991.0, 662160.58326]
['CLPRB', 'AL', 1992.0, 623931.97936]
['CLPRB', 'AL', 1993.0, 600868.70784]
['CLPRB', 'AL', 1994.0, 568576.60194]
['CLPRB', 'AL', 1995.0, 607149.8048]
['CLPRB', 'AL', 1996.0, 607031.28937]
['CLPRB', 'AL', 1997.0, 600683.283]
['CLPRB', 'AL', 1998.0, 568307.87604]
['CLPRB', 'AL', 1999.0, 477573.57872]
['CLPRB', 'AL', 2000.0, 472698.857]
['CLPRB', 'AL', 2001.0, 469961.62902]
['CLPRB', 'AL', 2002.0, 460200.77838]
['CLPRB', 'AL', 2003.0, 486363.09128]
['CLPRB', 'AL', 2004.0, 531179.44533]
['CLPRB', 'AL', 2005.0, 518423.32962]
['CLPRB', 'AL', 2006.0, 443032.27144]
['CLPRB', 'AL', 2007.0, 469035.572]
['CLPRB', 'AL', 2008.0, 506837.092]
['CLPRB', 'AL', 2009.0, 459528.87]
Out[126]:
318818.42136

In [114]:
df
df.to_csv('%s.csv'% (statelist[0]), encoding='utf-8', index=False)

In [125]:
print(range(1,nrows)[0+2])


3

In [127]:
import xlrd
CLPRB=pd.read_excel("..\CLPRB.xlsx")
for j in range(51):
    df = pd.read_csv('%s.csv' % (statelist[j]))
    df.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
    df['CLPRB']=0
    #df.drop(df.index[55],inplace=True)#delete last line of GDP
    data = xlrd.open_workbook('..\CLPRB.xlsx') # 打开xls文件
    table = data.sheets()[j] # 打开第j张表
    nrows = table.nrows # 获取表的行数
    for i in range(1,nrows): # 循环逐行打印
        df['CLPRB'][i-1] = table.row_values(i)[3] # columns 4 
    df.to_csv('%s.csv'% (statelist[j]), encoding='utf-8', index=False)


C:\Users\RicardoZhang\Miniconda3\lib\site-packages\ipykernel\__main__.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [6]:
import pandas as pd
import numpy as np
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
len(statelist)
statelist[51]
EMFDB=pd.read_excel("..\EMFDB.xlsx")

In [8]:
tempdf=EMFDB[EMFDB.StateCode=="AK"]
del tempdf['MSN']
del tempdf['StateCode']
tempdf.T


Out[8]:
0 1 2 3 4 5 6 7 8 9 ... 40 41 42 43 44 45 46 47 48 49
Year 1960.0 1961.0 1962.0 1963.0 1964.0 1965.0 1966.0 1967.0 1968.0 1969.0 ... 2000.0 2001.0 2002.0 2003.0 2004.0 2005.0 2006.0 2007.0 2008.0 2009.0
Data 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

2 rows × 50 columns


In [9]:
df = pd.read_csv('%s.csv' % (statelist[0]))
df


Out[9]:
Year HYTCP WYTCP SOEGP NUETP GDP CLPRB
0 1960.0 290.0 0.0 0.0 0.0 NaN 11263
1 1961.0 297.0 0.0 0.0 0.0 NaN 11497
2 1962.0 304.0 0.0 0.0 0.0 NaN 13587
3 1963.0 325.0 0.0 0.0 0.0 1083.0 13306
4 1964.0 322.0 0.0 0.0 0.0 1230.0 11622
5 1965.0 350.0 0.0 0.0 0.0 1374.0 13930
6 1966.0 316.0 0.0 0.0 0.0 1494.0 14461
7 1967.0 363.0 0.0 0.0 0.0 1643.0 14430
8 1968.0 364.0 0.0 0.0 0.0 1899.0 11700
9 1969.0 341.0 0.0 0.0 0.0 2073.0 10405
10 1970.0 363.0 0.0 0.0 0.0 2359.0 8564
11 1971.0 363.0 0.0 0.0 0.0 2539.0 10888
12 1972.0 346.0 0.0 0.0 0.0 2743.0 10420
13 1973.0 286.0 0.0 0.0 0.0 3041.0 10826
14 1974.0 326.0 0.0 0.0 0.0 3999.0 10920
15 1975.0 357.0 0.0 0.0 0.0 6220.0 11949
16 1976.0 383.0 0.0 0.0 0.0 7470.0 11013
17 1977.0 512.0 0.0 0.0 0.0 7544.0 10998
18 1978.0 472.0 0.0 0.0 0.0 9130.0 11403
19 1979.0 459.0 0.0 0.0 0.0 10891.0 12308
20 1980.0 539.0 0.0 0.0 0.0 15282.0 12339
21 1981.0 590.0 0.0 0.0 0.0 21824.0 12604
22 1982.0 561.0 0.0 0.0 0.0 23461.0 12994
23 1983.0 593.0 0.0 0.0 0.0 22591.0 12261
24 1984.0 693.0 0.0 0.0 0.0 23787.0 13400
25 1985.0 748.0 0.0 0.0 0.0 26142.0 22354
26 1986.0 809.0 0.0 0.0 0.0 19181.0 24492
27 1987.0 872.0 0.0 0.0 0.0 22925.0 23275
28 1988.0 935.0 0.0 0.0 0.0 21270.0 27222
29 1989.0 873.0 0.0 0.0 0.0 23453.0 24679
30 1990.0 975.0 0.0 0.0 0.0 25040.0 26613
31 1991.0 896.0 0.0 0.0 0.0 22283.0 22401
32 1992.0 918.0 0.0 0.0 0.0 22753.0 23930
33 1993.0 1303.0 0.0 0.0 0.0 23284.0 24975
34 1994.0 1345.0 0.0 0.0 0.0 23605.0 24445
35 1995.0 1372.0 0.0 0.0 0.0 25449.0 26488
36 1996.0 1266.0 0.0 0.0 0.0 26737.0 23103
37 1997.0 1099.0 0.0 0.0 0.0 27581.0 22620
38 1998.0 1113.0 0.0 0.0 0.0 24030.0 20966
39 1999.0 817.0 0.0 0.0 0.0 24684.0 24414
40 2000.0 1002.0 0.0 0.0 0.0 26853.0 25599
41 2001.0 1346.0 0.0 0.0 1.0 28553.0 23618
42 2002.0 1439.0 0.0 0.0 0.0 29763.0 17877
43 2003.0 1583.0 0.0 0.0 0.0 32039.0 16863
44 2004.0 1498.0 0.0 0.0 0.0 35195.0 23587
45 2005.0 1464.0 0.0 0.0 1.0 40063.0 22682
46 2006.0 1224.0 0.0 0.0 1.0 44679.0 22230
47 2007.0 1291.0 0.0 0.0 1.0 49197.0 20654
48 2008.0 1172.0 0.0 0.0 0.0 55461.0 23041
49 2009.0 1324.0 0.0 0.0 7.0 50463.0 29016
50 2010.0 1433.0 0.0 0.0 13.0 54134.0 0
51 2011.0 1345.0 0.0 0.0 12.0 58759.0 0
52 2012.0 1575.0 0.0 0.0 37.0 60890.0 0
53 2013.0 1435.0 0.0 0.0 145.0 59762.0 0
54 2014.0 1539.0 0.0 0.0 152.0 58253.0 0

In [10]:
df_r=pd.merge(df, tempdf,on='Year',how='outer')
df_r.rename(columns={'Data':'EMFDB'}, inplace = True)
df_r


Out[10]:
Year HYTCP WYTCP SOEGP NUETP GDP CLPRB EMFDB
0 1960.0 290.0 0.0 0.0 0.0 NaN 11263 0.0
1 1961.0 297.0 0.0 0.0 0.0 NaN 11497 0.0
2 1962.0 304.0 0.0 0.0 0.0 NaN 13587 0.0
3 1963.0 325.0 0.0 0.0 0.0 1083.0 13306 0.0
4 1964.0 322.0 0.0 0.0 0.0 1230.0 11622 0.0
5 1965.0 350.0 0.0 0.0 0.0 1374.0 13930 0.0
6 1966.0 316.0 0.0 0.0 0.0 1494.0 14461 0.0
7 1967.0 363.0 0.0 0.0 0.0 1643.0 14430 0.0
8 1968.0 364.0 0.0 0.0 0.0 1899.0 11700 0.0
9 1969.0 341.0 0.0 0.0 0.0 2073.0 10405 0.0
10 1970.0 363.0 0.0 0.0 0.0 2359.0 8564 0.0
11 1971.0 363.0 0.0 0.0 0.0 2539.0 10888 0.0
12 1972.0 346.0 0.0 0.0 0.0 2743.0 10420 0.0
13 1973.0 286.0 0.0 0.0 0.0 3041.0 10826 0.0
14 1974.0 326.0 0.0 0.0 0.0 3999.0 10920 0.0
15 1975.0 357.0 0.0 0.0 0.0 6220.0 11949 0.0
16 1976.0 383.0 0.0 0.0 0.0 7470.0 11013 0.0
17 1977.0 512.0 0.0 0.0 0.0 7544.0 10998 0.0
18 1978.0 472.0 0.0 0.0 0.0 9130.0 11403 0.0
19 1979.0 459.0 0.0 0.0 0.0 10891.0 12308 0.0
20 1980.0 539.0 0.0 0.0 0.0 15282.0 12339 0.0
21 1981.0 590.0 0.0 0.0 0.0 21824.0 12604 0.0
22 1982.0 561.0 0.0 0.0 0.0 23461.0 12994 0.0
23 1983.0 593.0 0.0 0.0 0.0 22591.0 12261 0.0
24 1984.0 693.0 0.0 0.0 0.0 23787.0 13400 0.0
25 1985.0 748.0 0.0 0.0 0.0 26142.0 22354 0.0
26 1986.0 809.0 0.0 0.0 0.0 19181.0 24492 0.0
27 1987.0 872.0 0.0 0.0 0.0 22925.0 23275 0.0
28 1988.0 935.0 0.0 0.0 0.0 21270.0 27222 0.0
29 1989.0 873.0 0.0 0.0 0.0 23453.0 24679 0.0
30 1990.0 975.0 0.0 0.0 0.0 25040.0 26613 0.0
31 1991.0 896.0 0.0 0.0 0.0 22283.0 22401 0.0
32 1992.0 918.0 0.0 0.0 0.0 22753.0 23930 0.0
33 1993.0 1303.0 0.0 0.0 0.0 23284.0 24975 0.0
34 1994.0 1345.0 0.0 0.0 0.0 23605.0 24445 0.0
35 1995.0 1372.0 0.0 0.0 0.0 25449.0 26488 0.0
36 1996.0 1266.0 0.0 0.0 0.0 26737.0 23103 0.0
37 1997.0 1099.0 0.0 0.0 0.0 27581.0 22620 0.0
38 1998.0 1113.0 0.0 0.0 0.0 24030.0 20966 0.0
39 1999.0 817.0 0.0 0.0 0.0 24684.0 24414 0.0
40 2000.0 1002.0 0.0 0.0 0.0 26853.0 25599 0.0
41 2001.0 1346.0 0.0 0.0 1.0 28553.0 23618 0.0
42 2002.0 1439.0 0.0 0.0 0.0 29763.0 17877 0.0
43 2003.0 1583.0 0.0 0.0 0.0 32039.0 16863 0.0
44 2004.0 1498.0 0.0 0.0 0.0 35195.0 23587 0.0
45 2005.0 1464.0 0.0 0.0 1.0 40063.0 22682 0.0
46 2006.0 1224.0 0.0 0.0 1.0 44679.0 22230 0.0
47 2007.0 1291.0 0.0 0.0 1.0 49197.0 20654 0.0
48 2008.0 1172.0 0.0 0.0 0.0 55461.0 23041 0.0
49 2009.0 1324.0 0.0 0.0 7.0 50463.0 29016 0.0
50 2010.0 1433.0 0.0 0.0 13.0 54134.0 0 NaN
51 2011.0 1345.0 0.0 0.0 12.0 58759.0 0 NaN
52 2012.0 1575.0 0.0 0.0 37.0 60890.0 0 NaN
53 2013.0 1435.0 0.0 0.0 145.0 59762.0 0 NaN
54 2014.0 1539.0 0.0 0.0 152.0 58253.0 0 NaN

In [ ]: