In [1]:
import pandas as pd
import numpy as np
df = pd.read_excel("Data/use_all.xlsx")
df.head()


Out[1]:
Data_Status State MSN 1960 1961 1962 1963 1964 1965 1966 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 2014F AK ABICP 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 38 3 8 0 -3 -1 0 0 -1 0
1 2014F AK ARICP 47.0 84.0 74.0 89.0 119.0 132.0 248.0 ... 186 50 139 82 1537 1905 2250 2161 1923 1859
2 2014F AK ARTCP 47.0 84.0 74.0 89.0 119.0 132.0 248.0 ... 186 50 139 82 1537 1905 2250 2161 1923 1859
3 2014F AK ARTXP 47.0 84.0 74.0 89.0 119.0 132.0 248.0 ... 186 50 139 82 1537 1905 2250 2161 1923 1859
4 2014F AK AVACP 1032.0 1276.0 1030.0 648.0 459.0 293.0 215.0 ... 277 250 248 200 217 169 159 154 139 131

5 rows × 58 columns


In [7]:
#df.loc[0].T

In [13]:
#df[df.MSN=="HYTCP"]

In [2]:
df[(df.MSN=="HYTCP")&(df.State=="AK")]


Out[2]:
Data_Status State MSN 1960 1961 1962 1963 1964 1965 1966 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
46 2014F AK HYTCP 290.0 297.0 304.0 325.0 322.0 350.0 316.0 ... 1464 1224 1291 1172 1324 1433 1345 1575 1435 1539

1 rows × 58 columns


In [3]:
alist=["HYTCP","WYTCP","SOEGP","NUETP"]
dftemp=df[df['MSN'].isin(alist)&(df.State=="AK")]
del dftemp['Data_Status']
#del dftemp['State']
del dftemp['MSN']
dftemp=dftemp.T
#print(dftemp)

In [35]:
#df.drop(0)
#statelist[0]=pd.DataFrame(index=range(0,4),columns=["HYTCP","WYTCP","SOEGP","NUETP"])
#print(statelist[0])
#datlist=[range(0,51)]
#datlist[0]==1
#datlist

In [4]:
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
len(statelist)
statelist[51]


Out[4]:
'WY'

In [5]:
alist=["HYTCP","WYTCP","SOEGP","NUETP"]
dftemp1=df[df['MSN'].isin(alist)&(df.State==statelist[0])]
del dftemp1['Data_Status']
del dftemp1['State']
del dftemp1['MSN']
dftemp1=dftemp1.T
print(dftemp1)


         46   89   117    126
1960   290.0  0.0  0.0    0.0
1961   297.0  0.0  0.0    0.0
1962   304.0  0.0  0.0    0.0
1963   325.0  0.0  0.0    0.0
1964   322.0  0.0  0.0    0.0
1965   350.0  0.0  0.0    0.0
1966   316.0  0.0  0.0    0.0
1967   363.0  0.0  0.0    0.0
1968   364.0  0.0  0.0    0.0
1969   341.0  0.0  0.0    0.0
1970   363.0  0.0  0.0    0.0
1971   363.0  0.0  0.0    0.0
1972   346.0  0.0  0.0    0.0
1973   286.0  0.0  0.0    0.0
1974   326.0  0.0  0.0    0.0
1975   357.0  0.0  0.0    0.0
1976   383.0  0.0  0.0    0.0
1977   512.0  0.0  0.0    0.0
1978   472.0  0.0  0.0    0.0
1979   459.0  0.0  0.0    0.0
1980   539.0  0.0  0.0    0.0
1981   590.0  0.0  0.0    0.0
1982   561.0  0.0  0.0    0.0
1983   593.0  0.0  0.0    0.0
1984   693.0  0.0  0.0    0.0
1985   748.0  0.0  0.0    0.0
1986   809.0  0.0  0.0    0.0
1987   872.0  0.0  0.0    0.0
1988   935.0  0.0  0.0    0.0
1989   873.0  0.0  0.0    0.0
1990   975.0  0.0  0.0    0.0
1991   896.0  0.0  0.0    0.0
1992   918.0  0.0  0.0    0.0
1993  1303.0  0.0  0.0    0.0
1994  1345.0  0.0  0.0    0.0
1995  1372.0  0.0  0.0    0.0
1996  1266.0  0.0  0.0    0.0
1997  1099.0  0.0  0.0    0.0
1998  1113.0  0.0  0.0    0.0
1999   817.0  0.0  0.0    0.0
2000  1002.0  0.0  0.0    0.0
2001  1346.0  0.0  0.0    1.0
2002  1439.0  0.0  0.0    0.0
2003  1583.0  0.0  0.0    0.0
2004  1498.0  0.0  0.0    0.0
2005  1464.0  0.0  0.0    1.0
2006  1224.0  0.0  0.0    1.0
2007  1291.0  0.0  0.0    1.0
2008  1172.0  0.0  0.0    0.0
2009  1324.0  0.0  0.0    7.0
2010  1433.0  0.0  0.0   13.0
2011  1345.0  0.0  0.0   12.0
2012  1575.0  0.0  0.0   37.0
2013  1435.0  0.0  0.0  145.0
2014  1539.0  0.0  0.0  152.0

In [7]:
import numpy as np
datalist=[]
for i in range(0,52):
    alist=["Year","HYTCP","WYTCP","SOEGP","NUETP"]
    dftemp=df[df['MSN'].isin(alist)&(df.State==statelist[i])]
    del dftemp['Data_Status']
    del dftemp['State']
    del dftemp['MSN']
    dftemp=dftemp.T
    #print(dftemp)
    dftemp.columns=['HYTCP','WYTCP', 'SOEGP', 'NUETP']
    #print(dftemp)
    datalist.append(dftemp)
    #datalist.append(pd.DataFrame(dftemp,index=range(1960,2015),columns=["HYTCP","WYTCP","SOEGP","NUETP"]))
    datalist[i].to_csv('Data/Data_States/%s.csv'% (statelist[i]), encoding='utf-8', index=True)
#print(datalist)

In [38]:
#dataframe可以使用to_csv方法方便地导出到csv文件中,如果数据中含有中文,一般encoding指定为”utf-8″,否则导出时程序会因为不能识别相应的字符串而抛出异常,index指定为False表示不用导出dataframe的index数据。
#datalist[0].to_csv('%s.csv'% (statelist[0]), encoding='utf-8', index=True)

In [ ]: