In [ ]:
%reset
In [81]:
import os
import pandas as pd
In [2]:
os.chdir('/Users/yanlinli/Dropbox/141GroupPrj/OriginalData')
In [3]:
cdl_data = pd.read_excel('cdl_ca_gw_basins.xlsx')
In [4]:
cdl_data.shape
Out[4]:
In [5]:
cdl_data.head()
Out[5]:
In [6]:
import collections
collections.Counter(cdl_data.Basin_Name).most_common(3)
Out[6]:
In [7]:
cdl_data.ix[cdl_data.Basin_Name== u'SAN JOAQUIN VALLEY',].head()
Out[7]:
In [8]:
SanJQ = cdl_data.ix[cdl_data.Basin_Name== u'SAN JOAQUIN VALLEY',].reset_index(drop=True)
In [9]:
SanJQ.head()
Out[9]:
In [10]:
collections.Counter(SanJQ.Subbasin_N)
Out[10]:
In [11]:
subBasin_County_dict = {'CHOWCHILLA':"Madera","COSUMNES":"San Joaquin",
'DELTA-MENDOTA':'Stanislaus','EASTERN SAN JOAQUIN':'San Joaquin',
'KAWEAH':'Tulare','KERN COUNTY':'Kern','KETTLEMAN PLAIN':'Kings',
'KINGS':'Kings','MADERA':'Madera','MERCED':'Merced',
'MODESTO':'Stanislaus','PLEASANT VALLEY':'Fresno',
'TRACY':'San Joaquin','TULARE LAKE':'Kings','TULE':'Tulare',
'TURLOCK':'Stanislaus','WESTSIDE':'Fresno','WHITE WOLF':'Kern'}
In [12]:
SanJQ["county"] = SanJQ["Subbasin_N"].map(subBasin_County_dict)
In [13]:
SanJQ.head()
Out[13]:
In [14]:
dbl_crop = SanJQ.ix[SanJQ.cdl_class_name.str.startswith('Dbl'),]
In [15]:
dbl_crop.shape
Out[15]:
In [16]:
type(dbl_crop)
Out[16]:
In [17]:
dbl_crop.iloc[0:3]
Out[17]:
In [18]:
collections.Counter(dbl_crop.cdl_class_name)
Out[18]:
In [19]:
dbl_crop.acres = dbl_crop.acres/2
In [20]:
dbl_crop.head()
Out[20]:
In [21]:
dcl_cdl_class_name_0=[]
dcl_cdl_class_name_1=[]
for i in (dbl_crop.cdl_class_name):
dcl_cdl_class_name_0.append(str(i)[9:].split('/')[0])
dcl_cdl_class_name_1.append(str(i)[9:].split('/')[1])
In [22]:
dbl_crop.cdl_class_name= pd.DataFrame(dcl_cdl_class_name_0).values
dbl_crop_B = dbl_crop.copy()
dbl_crop_B.cdl_class_name= pd.DataFrame(dcl_cdl_class_name_1).values
dbl_crop_B.head()
Out[22]:
In [23]:
dbl_crop.head()
Out[23]:
In [24]:
collections.Counter(dbl_crop.cdl_class_name)
Out[24]:
In [25]:
#pd.DataFrame(dcl_cdl_class_name_0).values
type(pd.DataFrame(dcl_cdl_class_name_0).values)
pd.DataFrame(dcl_cdl_class_name_0).values.dtype
SanJQ.cdl_class_name.values.dtype
Out[25]:
In [26]:
double_full_df = pd.concat([dbl_crop, dbl_crop_B],ignore_index=True)
print double_full_df.shape
double_full_df.head()
Out[26]:
In [27]:
SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),].shape
Out[27]:
In [28]:
SanJQ.shape
Out[28]:
In [29]:
SanJQ.ix[SanJQ.cdl_class_name.str.startswith('Dbl'),].shape
Out[29]:
In [30]:
SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),].head()
Out[30]:
In [31]:
double_full_df.head()
Out[31]:
In [32]:
pd.concat([SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),], double_full_df],ignore_index=True).shape
Out[32]:
In [33]:
6211+422*2
Out[33]:
In [34]:
SanJQ_single = pd.concat([SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),], double_full_df],ignore_index=True)
In [35]:
SanJQ_single.shape
Out[35]:
In [36]:
SanJQ_single.head()
Out[36]:
In [37]:
collections.Counter(SanJQ_single.cdl_class_name)
type(SanJQ_single.cdl_class_name)
Out[37]:
In [38]:
len(SanJQ_single.cdl_class_name)
Out[38]:
In [39]:
SanJQ_single.ix[SanJQ_single.cdl_class_name =='Cantaloupe']
Out[39]:
In [40]:
SanJQ_single.set_value([6833,6862],'cdl_class_name',u'Cantaloupes')
SanJQ_single.iloc[[6833,6862],]
Out[40]:
In [41]:
#SanJQ_single.ix[SanJQ_single.cdl_class_name =='WinWht'] #Winter Wheat
#SanJQ_single.ix[SanJQ_single.cdl_class_name =='Durum Wht'] #Durum Wheat
In [42]:
ind = SanJQ_single.ix[SanJQ_single.cdl_class_name =='WinWht'].index.values
In [43]:
SanJQ_single.set_value(ind,'cdl_class_name',u'Winter Wheat')
SanJQ_single.ix[6212]
Out[43]:
In [44]:
indDW = SanJQ_single.ix[SanJQ_single.cdl_class_name =='Durum Wht'].index.values #Durum Wheat
indDW
Out[44]:
In [45]:
SanJQ_single.set_value(indDW,'cdl_class_name',u'Durum Wheat')
SanJQ_single.ix[indDW]
Out[45]:
In [46]:
#collections.Counter(SanJQ_single.cdl_class_name)
In [47]:
u'd'=='d'
Out[47]:
In [48]:
cropN_mainCrop_dict = {u'Alfalfa':'Alfalfa',
u'Almonds':'Al Pist',
u'Apples': 'Oth Dec',
u'Apricots':'Oth Dec',
u'Asparagus':'Oth Trk',
u'Barley':'Grain',
u'Broccoli':'Oth Trk',
u'Cabbage':'Oth Trk',
u'Camelina': 'Oth Fld',
u'Caneberries':'Oth Trk',
u'Blueberries':'Oth Trk',
u'Canola':'Oth Fld',
u'Cantaloupes': 'Cucurb',
u'Carrots':'Oth Trk',
u'Cauliflower':'Oth Trk',
u'Cherries': 'Oth Dec',
u'Christmas Trees': 'Oth Trk',
u'Citrus':'Subtrop',
u'Clover/Wildflowers':'Pasture',
u'Corn':'Corn',
u'Cotton':'Cotton',
u'Cranberries':'Oth Trk',
u'Cucumbers':'Cucurb',
u'Dry Beans': 'DryBean',
u'Durum Wheat': 'Grain',
u'Garlic':'On Gar',
u'Grapes':'Vine',
u'Grass/Pasture': "Pasture",
u'Greens': "Oth Trk",
u'Herbs': 'Oth Trk',
u'Honeydew Melons': 'Cucurb', u'Lettuce': 'Oth Trk',
u'Millet': 'Oth Fld',
u'Misc Vegs & Fruits': "Oth Dec",
u'Mustard': 'Oth Fld',
u'Nectarines': 'Oth Dec',
u'Oats': "Grain",
u'Olives': 'Subtrop',
u'Onions': 'On Gar',
u'Oranges': 'Subtrop',
u'Other Crops': 'Oth Fld',
u'Other Hay/Non Alfalfa': 'Grain',
u'Other Tree Crops':'Oth Trk',
u'Peaches': 'Oth Dec',
u'Pears': 'Oth Dec',
u'Peas': 'Oth Trk',
u'Pecans': 'Oth Dec',
u'Peppers': 'Oth Trk',
u'Pistachios': 'Al Pist',
u'Plums': 'Oth Dec',
u'Pomegranates': 'Oth Dec',
u'Pop or Orn Corn': 'Corn',
u'Potatoes': 'Potato',
u'Prunes': 'Oth Dec',
u'Pumpkins': 'Cucurb',
u'Radishes': "SgrBeet",
u'Rice': 'Rice',
u'Rye': 'Grain',
u'Safflower': 'Safflwr',
u'Sod/Grass Seed': 'Pasture',
u'Sorghum': 'Oth Fld',
u'Soybeans': 'DryBean',
u'Spring Wheat': 'Grain',
u'Squash': 'Cucurb',
u'Strawberries': 'Oth Trk',
u'Sugarbeets': 'SgrBeet',
u'Sunflower': 'Oth Fld',
u'Sweet Corn': 'Corn',
u'Sweet Potatoes': 'Potato',
u'Tomatoes': 'Pr Tom',
u'Triticale': 'Grain',
u'Turnips': 'SgrBeet',
u'Vetch': 'Oth Trk',
u'Walnuts': 'Oth Dec',
u'Watermelons': 'Cucurb',
u'Winter Wheat': 'Grain'}
In [49]:
my_map = cropN_mainCrop_dict.copy()
inv_map = {}
for k, v in my_map.iteritems():
inv_map[v] = inv_map.get(v, [])
inv_map[v].append(k)
print inv_map.keys()
inv_map
Out[49]:
In [50]:
SanJQ_single["Crop_Name"] = SanJQ_single["cdl_class_name"].map(cropN_mainCrop_dict)
In [51]:
##
cdl_null_df = SanJQ_single[pd.isnull(SanJQ_single).any(axis=1)]
print SanJQ_single.shape
print cdl_null_df.shape
collections.Counter(cdl_null_df.cdl_class_name)
Out[51]:
In [52]:
print 7055 - 1357
SanJQ_single = SanJQ_single.dropna()
SanJQ_single.shape
Out[52]:
In [53]:
SanJQ_single.head()
Out[53]:
In [54]:
pwd
Out[54]:
In [55]:
path='/Users/yanlinli/Dropbox/141groupprj/OriginalData'
os.listdir(path)
Out[55]:
In [56]:
Ag_2010 = pd.read_excel('Ag_2010.xlsx')
In [57]:
Ag_2010 = Ag_2010.drop('Year', 1)
Ag_2010.index = Ag_2010['Co_Name']
Ag_2010.head()
Out[57]:
In [58]:
Ag_2010 = Ag_2010.drop('Co_Name', 1)
Ag_2010.head()
Out[58]:
In [59]:
Ag_2010.rename(columns=lambda x: x.replace('AW', '').replace('WA','').strip(), inplace=True)
In [60]:
Ag_2010.head()
Out[60]:
In [61]:
#but i don't know how to creat 2d df from here
unstack_df = pd.DataFrame(Ag_2010.unstack())
unstack_df.head()
#unstack_df.index.levels
Out[61]:
In [62]:
from itertools import chain
county_list = []
for i in Ag_2010.index:
county_list.append([i]*20)
county_list = list(chain(*county_list))
print 'county list len', len(county_list)
crop_list = list(chain(*([Ag_2010.columns.values]*57)))
len(crop_list )
Out[62]:
In [63]:
Ag_2010_df = pd.DataFrame(list(chain(*Ag_2010.values)))
Ag_2010_df = Ag_2010_df.rename(columns={0:'WaterUsagePerAcre'})
Ag_2010_df['County'] = pd.DataFrame(county_list)
Ag_2010_df['Crop'] = pd.DataFrame(crop_list)
Ag_2010_df.head()
Out[63]:
In [64]:
print set(collections.Counter(SanJQ_single.Crop_Name))
print len(set(collections.Counter(SanJQ_single.Crop_Name)))
print set(collections.Counter(Ag_2010_df.Crop))
print len(set(collections.Counter(Ag_2010_df.Crop)))
print set(collections.Counter(Ag_2010_df.Crop))& set(collections.Counter(SanJQ_single.Crop_Name))
print len(set(collections.Counter(Ag_2010_df.Crop))&set(collections.Counter(SanJQ_single.Crop_Name)))
In [65]:
print set(collections.Counter(SanJQ_single.county))
set(collections.Counter(Ag_2010_df.County))& set(collections.Counter(SanJQ_single.county))
Out[65]:
In [66]:
SanJQ_single = SanJQ_single.rename(columns={'county':'County','Crop_Name':'Crop'})
print SanJQ_single.shape
print SanJQ_single.shape
SanJQ_single.head()
Out[66]:
In [67]:
ALL = pd.merge(SanJQ_single, Ag_2010_df, on=['County', 'Crop'])
ALL.shape
ALL = ALL.drop('Basin_Name',1)
In [68]:
print sum(ALL.isnull().any(axis=1))
ALL.head()
Out[68]:
In [69]:
collections.Counter(ALL.Subbasin_N)
print collections.Counter(ALL.County)
list(ALL)
Out[69]:
In [70]:
ALL[(ALL.Subbasin_N =='CHOWCHILLA') & (ALL.year ==2010) & (ALL.County=='Madera') & (ALL.Crop =='Al Pist')]
Out[70]:
In [71]:
32844.160041+4114.304601 #match the fisrt number in the below df
Out[71]:
In [72]:
f = {'acres':['sum'] }
ALL_merge = ALL.groupby([u'Subbasin_N', u'year',
u'County', u'Crop', u'WaterUsagePerAcre']).agg(f)
In [73]:
ALL_merge.head()
Out[73]:
In [74]:
print ALL_merge.shape
ALL.shape
Out[74]:
In [75]:
Names = pd.DataFrame([ALL_merge.ix[i,].name for i in range(0,1879)])
Values = pd.DataFrame([ALL_merge.ix[i,].values for i in range(0,1879)])
ALL2 = pd.concat([Names, Values], axis=1,ignore_index=True)
In [76]:
ALL2.shape
Out[76]:
In [77]:
Values.shape
Out[77]:
In [78]:
ALL2 = ALL2.rename(columns = {0:u'Subbasin_N', 1:u'Year', 2:'County',
3:u'Crop',4:u'WaterUsagePerAcre', 5:u'Acre'})
print ALL2.shape
ALL2.head()
Out[78]:
In [3]:
path='/Users/yanlinli/Dropbox/141groupprj/OriginalData'
os.listdir(path)
Out[3]:
In [80]:
precipitation = pd.read_excel('precipitation.xlsx')
In [81]:
collections.Counter(ALL2.County)
Out[81]:
In [82]:
collections.Counter(precipitation.County_N)
Out[82]:
In [83]:
precipitation = precipitation.rename(columns = {'County_N':u'County','year':u'Year'})
In [84]:
precipitation.head()
Out[84]:
In [85]:
ALL3 = pd.merge(ALL2, precipitation, on=['County', 'Year'])
In [86]:
ALL3.shape
Out[86]:
In [87]:
ALL3.head()
Out[87]:
In [89]:
#ALL3.to_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip.xlsx', sheet_name='sheet1', index=False)
In [90]:
pwd
Out[90]:
In [91]:
#cropN_mainCrop_dict
In [90]:
CropGroup = pd.DataFrame(cropN_mainCrop_dict.items())
CropGroup = CropGroup.rename(columns = {0:u'SubCrop',1:u'Crop'})
CropGroup.head()
Out[90]:
In [91]:
collections.Counter(ALL3.Subbasin_N)
##u'KETTLEMAN PLAIN': 84,
##u'WHITE WOLF': 88})
#Sq.to_excel('Sq_long_La_16Sub.xlsx', sheet_name='sheet1', index=False)
Out[91]:
In [92]:
collections.Counter(ALL3.County)
Out[92]:
In [94]:
SqLL = pd.read_excel('Sq_long_La_16Sub.xlsx')
In [98]:
#SqLL.head()
#collections.Counter(SqLL.Subbasin_N)
In [102]:
collections.Counter(ALL3.Crop).keys()
Out[102]:
In [107]:
# data from price value in 2013
# econ value is dollar per acers
def avg(l):
return sum(l, 0.0) / len(l)
Econ_dict = { "Al Pist":2360*3.21,
"Alfalfa":7.0*206.00,
"Corn": 26.50*48.23,
"Cotton":1628*140.00,
"Cucurb":avg([260*20.20, 180*35.40, 200*25.90,580*13.00,300*16.00,330*15.60]),
#Honeydew Melons 260 2,730,000 20.20 Cwt. Cwt. $/Cwt.
#"Squash" 180 1,224,000 35.40 Cwt. Cwt. $/Cwt.
#"Cucumbers" 200 760,000 25.90 Cwt. Cwt. $/Cwt.
#"Watermelons" 580 5,800,000 13.00 Cwt. Cwt. $/Cwt.
#"Cantaloupes" 300 12,750,000 16.00 Cwt. Cwt. $/Cwt.
#"Pumpkins 330 1,947,000 15.60 Cwt. Cwt. $/Cwt.
"DryBean": 2320*56.80,
"Grain":5.35*190.36,
"On Gar":avg([ 400*13.20,165*60.30 ]),
#"Onions" spring 400 2,720,000 13.20 summer 490 3,822,000 6.40 Onions, Summer Storage 399 11,700,000 9.11
# "Garlic" 165 3,795,000 60.30
"Oth Dec":avg([ 8.88*466 ,5.73*682 ,2.48*3390 ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06 ]),
#"Apples" 8.88 135,000 466 Tons Tons $/Ton
#"Apricots" 5.73 54,400 682 Tons Tons $/Ton
#"Cherries", 2.48 82,000 3,390 Tons Tons $/Ton
#"Pears", 19.00 220,000 391 Tons Tons $/Ton
#"Nectarines" 8.33 150,000 780 Tons Tons $/Ton
#"Peaches", 14.10 648,000 429 Tons Tons $/Ton
#"Plums", 5.30 95,400 664 Tons Tons $/Ton
#"Walnuts" 1.76 492,000 3,710 #tones Tons $/Ton
#"Pecans" 1,750 5,000 2.06 Pounds 1000pounds $/Pound
"Oth Fld":avg([1296.00* 27.1, 17.00*37.56]),
# sunflowers 1,296.00 751,500 27.1 Tons Tons $/Ton
# Sorghum2009 17.00 646,000 37.56 Tons Tons $/Ton
"Oth Trk":avg([320*29.60, 350*24.90, 32*152.00, 180*42.70, 107*248.00,425*41.70,385* 38.70 ,165*42.10,405*21.70 ]),
#"Carrots" 320 20,000,000 29.60 Cwt. Cwt. $/Cwt.
#"Lettuce" 350 33,600,000 24.90 Cwt. Cwt. $/Cwt.
#"Asparagus" 32 368,000 152.00 Cwt. Cwt. $/Cwt.
#"Cauliflower" 180 5,868,000 42.70 Cwt. Cwt. $/Cwt.
# berries 107 514,000 248.00 Cwt. Cwt. $/Cwt.
# "Peppers Bell", 425 8,465,000 41.70 Cwt. Cwt. $/Cwt.
# pepers Chile 385 2,640,000 38.70 Cwt. Cwt. $/Cwt.
# "Broccoli", 165 20,460,000 42.10 8 Cwt. Cwt. $/Cwt.
# "Cabbage", 405 5,670,000 21.70 Cwt. Cwt. $/Cwt.
"Pasture":0,
"Potato":425*17.1, # Cwt. Cwt. $/Cwt.
"Pro Tom":300*36.20, # Cwt. Cwt. $/Cwt
"Rice":84.80*20.9, # Cwt. Cwt. $/Cwt
"Safflwr": 2000.00*26.5, # Pounds Cwt. $/Cwt.
"SgrBeet": 43.40*52.1, # Tons Tons $/Ton
"Subtrop":avg([622*6.52,4.15*813 ]),
# orange 622 109000000 6.52
# Olives 4.15 166000 813 Tons Tons $/Ton
"Vine":900*5.07}# Cartons 3/ Cartons $/Carton
Econ_dict
Out[107]:
In [109]:
ALL3["EcnValue"] = ALL3["Crop"].map(Econ_dict)
ALL3.to_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx', sheet_name='sheet1', index=False)
In [110]:
ALL3.head()
Out[110]:
In [ ]:
In [ ]:
In [82]:
SqLL = pd.read_excel('Sq_long_La_16Sub.xlsx')
print SqLL.shape
SqLL.head()
Out[82]:
In [85]:
type(SqLL)
print SqLL.latitude.dtype
print SqLL.longitude.dtype
Sub_Lon_Lat = SqLL.groupby(['Subbasin_N']).mean()
Sub_Lon_Lat = Sub_Lon_Lat.reset_index(True)
In [86]:
Sub_Lon_Lat
Out[86]:
In [87]:
Sub_Lon_Lat.to_excel('18Sub_Lon_Lat.xlsx', sheet_name='sheet1', index=False)