In [ ]:
%reset

In [81]:
import os
import pandas as pd

Part I: Data Clean


In [2]:
os.chdir('/Users/yanlinli/Dropbox/141GroupPrj/OriginalData')

In [3]:
cdl_data = pd.read_excel('cdl_ca_gw_basins.xlsx')

In [4]:
cdl_data.shape


Out[4]:
(58865, 5)

In [5]:
cdl_data.head()


Out[5]:
Basin_Name Subbasin_N year cdl_class_name acres
0 SMITH RIVER PLAIN NaN 2010 Grass/Pasture 15879.436603
1 SMITH RIVER PLAIN NaN 2010 Woody Wetlands 4380.066439
2 SMITH RIVER PLAIN NaN 2010 Developed/Open Space 4112.525443
3 SMITH RIVER PLAIN NaN 2010 Open Water 3371.728220
4 SMITH RIVER PLAIN NaN 2010 Evergreen Forest 3339.480967

In [6]:
import collections
collections.Counter(cdl_data.Basin_Name).most_common(3)


Out[6]:
[(u'SAN JOAQUIN VALLEY', 6633),
 (u'SACRAMENTO VALLEY', 5485),
 (u'SALINAS VALLEY', 2454)]

In [7]:
cdl_data.ix[cdl_data.Basin_Name== u'SAN JOAQUIN VALLEY',].head()


Out[7]:
Basin_Name Subbasin_N year cdl_class_name acres
32782 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650
32783 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965
32784 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532
32785 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525
32786 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747

In [8]:
SanJQ = cdl_data.ix[cdl_data.Basin_Name== u'SAN JOAQUIN VALLEY',].reset_index(drop=True)

In [9]:
SanJQ.head()


Out[9]:
Basin_Name Subbasin_N year cdl_class_name acres
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747

In [10]:
collections.Counter(SanJQ.Subbasin_N)


Out[10]:
Counter({u'CHOWCHILLA': 335,
         u'COSUMNES': 365,
         u'DELTA-MENDOTA': 442,
         u'EASTERN SAN JOAQUIN': 440,
         u'KAWEAH': 384,
         u'KERN COUNTY': 409,
         u'KETTLEMAN PLAIN': 210,
         u'KINGS': 434,
         u'MADERA': 379,
         u'MERCED': 388,
         u'MODESTO': 401,
         u'PLEASANT VALLEY': 211,
         u'TRACY': 408,
         u'TULARE LAKE': 377,
         u'TULE': 393,
         u'TURLOCK': 390,
         u'WESTSIDE': 391,
         u'WHITE WOLF': 276})

In [11]:
subBasin_County_dict = {'CHOWCHILLA':"Madera","COSUMNES":"San Joaquin",
                    'DELTA-MENDOTA':'Stanislaus','EASTERN SAN JOAQUIN':'San Joaquin',
                   'KAWEAH':'Tulare','KERN COUNTY':'Kern','KETTLEMAN PLAIN':'Kings',
                    'KINGS':'Kings','MADERA':'Madera','MERCED':'Merced',
                    'MODESTO':'Stanislaus','PLEASANT VALLEY':'Fresno',
                    'TRACY':'San Joaquin','TULARE LAKE':'Kings','TULE':'Tulare',
                    'TURLOCK':'Stanislaus','WESTSIDE':'Fresno','WHITE WOLF':'Kern'}

In [12]:
SanJQ["county"] = SanJQ["Subbasin_N"].map(subBasin_County_dict)

In [13]:
SanJQ.head()


Out[13]:
Basin_Name Subbasin_N year cdl_class_name acres county
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965 San Joaquin
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532 San Joaquin
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525 San Joaquin
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747 San Joaquin

In [14]:
dbl_crop = SanJQ.ix[SanJQ.cdl_class_name.str.startswith('Dbl'),]

In [15]:
dbl_crop.shape


Out[15]:
(422, 6)

In [16]:
type(dbl_crop)


Out[16]:
pandas.core.frame.DataFrame

In [17]:
dbl_crop.iloc[0:3]


Out[17]:
Basin_Name Subbasin_N year cdl_class_name acres county
20 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop Oats/Corn 3479.367324 San Joaquin
26 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop WinWht/Corn 1360.166862 San Joaquin
44 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop WinWht/Sorghum 54.486737 San Joaquin

In [18]:
collections.Counter(dbl_crop.cdl_class_name)


Out[18]:
Counter({u'Dbl Crop Barley/Corn': 63,
         u'Dbl Crop Barley/Sorghum': 39,
         u'Dbl Crop Durum Wht/Sorghum': 6,
         u'Dbl Crop Lettuce/Cantaloupe': 2,
         u'Dbl Crop Lettuce/Cotton': 8,
         u'Dbl Crop Oats/Corn': 90,
         u'Dbl Crop WinWht/Corn': 97,
         u'Dbl Crop WinWht/Cotton': 37,
         u'Dbl Crop WinWht/Sorghum': 76,
         u'Dbl Crop WinWht/Soybeans': 4})

In [19]:
dbl_crop.acres = dbl_crop.acres/2


/Users/yanlinli/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py:2773: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value

In [20]:
dbl_crop.head()


Out[20]:
Basin_Name Subbasin_N year cdl_class_name acres county
20 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop Oats/Corn 1739.683662 San Joaquin
26 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop WinWht/Corn 680.083431 San Joaquin
44 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Dbl Crop WinWht/Sorghum 27.243368 San Joaquin
89 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Dbl Crop Oats/Corn 6434.994791 San Joaquin
95 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Dbl Crop WinWht/Corn 1725.339194 San Joaquin

In [21]:
dcl_cdl_class_name_0=[]
dcl_cdl_class_name_1=[]
for i in (dbl_crop.cdl_class_name):
    dcl_cdl_class_name_0.append(str(i)[9:].split('/')[0])
    dcl_cdl_class_name_1.append(str(i)[9:].split('/')[1])

In [22]:
dbl_crop.cdl_class_name= pd.DataFrame(dcl_cdl_class_name_0).values
dbl_crop_B  = dbl_crop.copy()
dbl_crop_B.cdl_class_name= pd.DataFrame(dcl_cdl_class_name_1).values
dbl_crop_B.head()


Out[22]:
Basin_Name Subbasin_N year cdl_class_name acres county
20 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 1739.683662 San Joaquin
26 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 680.083431 San Joaquin
44 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Sorghum 27.243368 San Joaquin
89 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Corn 6434.994791 San Joaquin
95 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Corn 1725.339194 San Joaquin

In [23]:
dbl_crop.head()


Out[23]:
Basin_Name Subbasin_N year cdl_class_name acres county
20 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Oats 1739.683662 San Joaquin
26 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 680.083431 San Joaquin
44 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 27.243368 San Joaquin
89 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Oats 6434.994791 San Joaquin
95 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 WinWht 1725.339194 San Joaquin

In [24]:
collections.Counter(dbl_crop.cdl_class_name)


Out[24]:
Counter({'Barley': 102,
         'Durum Wht': 6,
         'Lettuce': 10,
         'Oats': 90,
         'WinWht': 214})

In [25]:
#pd.DataFrame(dcl_cdl_class_name_0).values
type(pd.DataFrame(dcl_cdl_class_name_0).values)
pd.DataFrame(dcl_cdl_class_name_0).values.dtype
SanJQ.cdl_class_name.values.dtype


Out[25]:
dtype('O')

In [26]:
double_full_df = pd.concat([dbl_crop, dbl_crop_B],ignore_index=True)
print double_full_df.shape
double_full_df.head()


(844, 6)
Out[26]:
Basin_Name Subbasin_N year cdl_class_name acres county
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Oats 1739.683662 San Joaquin
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 680.083431 San Joaquin
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 27.243368 San Joaquin
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Oats 6434.994791 San Joaquin
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 WinWht 1725.339194 San Joaquin

In [27]:
SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),].shape


Out[27]:
(6211, 6)

In [28]:
SanJQ.shape


Out[28]:
(6633, 6)

In [29]:
SanJQ.ix[SanJQ.cdl_class_name.str.startswith('Dbl'),].shape


Out[29]:
(422, 6)

In [30]:
SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),].head()


Out[30]:
Basin_Name Subbasin_N year cdl_class_name acres county
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965 San Joaquin
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532 San Joaquin
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525 San Joaquin
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747 San Joaquin

In [31]:
double_full_df.head()


Out[31]:
Basin_Name Subbasin_N year cdl_class_name acres county
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Oats 1739.683662 San Joaquin
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 680.083431 San Joaquin
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 WinWht 27.243368 San Joaquin
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 Oats 6434.994791 San Joaquin
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2011 WinWht 1725.339194 San Joaquin

In [32]:
pd.concat([SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),], double_full_df],ignore_index=True).shape


Out[32]:
(7055, 6)

In [33]:
6211+422*2


Out[33]:
7055

In [34]:
SanJQ_single = pd.concat([SanJQ.ix[~SanJQ.cdl_class_name.str.startswith('Dbl'),], double_full_df],ignore_index=True)

In [35]:
SanJQ_single.shape


Out[35]:
(7055, 6)

In [36]:
SanJQ_single.head()


Out[36]:
Basin_Name Subbasin_N year cdl_class_name acres county
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965 San Joaquin
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532 San Joaquin
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525 San Joaquin
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747 San Joaquin

In [37]:
collections.Counter(SanJQ_single.cdl_class_name)
type(SanJQ_single.cdl_class_name)


Out[37]:
pandas.core.series.Series

In [38]:
len(SanJQ_single.cdl_class_name)


Out[38]:
7055

In [39]:
SanJQ_single.ix[SanJQ_single.cdl_class_name =='Cantaloupe']


Out[39]:
Basin_Name Subbasin_N year cdl_class_name acres county
6833 SAN JOAQUIN VALLEY KINGS 2012 Cantaloupe 1.111974 Kings
6862 SAN JOAQUIN VALLEY WESTSIDE 2012 Cantaloupe 131.880142 Fresno

In [40]:
SanJQ_single.set_value([6833,6862],'cdl_class_name',u'Cantaloupes')
SanJQ_single.iloc[[6833,6862],]


Out[40]:
Basin_Name Subbasin_N year cdl_class_name acres county
6833 SAN JOAQUIN VALLEY KINGS 2012 Cantaloupes 1.111974 Kings
6862 SAN JOAQUIN VALLEY WESTSIDE 2012 Cantaloupes 131.880142 Fresno

In [41]:
#SanJQ_single.ix[SanJQ_single.cdl_class_name =='WinWht'] #Winter Wheat
#SanJQ_single.ix[SanJQ_single.cdl_class_name =='Durum Wht'] #Durum Wheat

In [42]:
ind = SanJQ_single.ix[SanJQ_single.cdl_class_name =='WinWht'].index.values

In [43]:
SanJQ_single.set_value(ind,'cdl_class_name',u'Winter Wheat')
SanJQ_single.ix[6212]


Out[43]:
Basin_Name         SAN JOAQUIN VALLEY
Subbasin_N        EASTERN SAN JOAQUIN
year                             2010
cdl_class_name           Winter Wheat
acres                         680.083
county                    San Joaquin
Name: 6212, dtype: object

In [44]:
indDW = SanJQ_single.ix[SanJQ_single.cdl_class_name =='Durum Wht'].index.values #Durum Wheat
indDW


Out[44]:
array([6476, 6483, 6509, 6546, 6571, 6578])

In [45]:
SanJQ_single.set_value(indDW,'cdl_class_name',u'Durum Wheat')
SanJQ_single.ix[indDW]


Out[45]:
Basin_Name Subbasin_N year cdl_class_name acres county
6476 SAN JOAQUIN VALLEY KAWEAH 2013 Durum Wheat 0.555987 Tulare
6483 SAN JOAQUIN VALLEY KAWEAH 2014 Durum Wheat 14.344467 Tulare
6509 SAN JOAQUIN VALLEY TULARE LAKE 2013 Durum Wheat 11.453334 Kings
6546 SAN JOAQUIN VALLEY TULE 2014 Durum Wheat 0.444790 Tulare
6571 SAN JOAQUIN VALLEY KERN COUNTY 2013 Durum Wheat 0.444790 Kern
6578 SAN JOAQUIN VALLEY KERN COUNTY 2014 Durum Wheat 0.111197 Kern

In [46]:
#collections.Counter(SanJQ_single.cdl_class_name)

In [47]:
u'd'=='d'


Out[47]:
True

In [48]:
cropN_mainCrop_dict = {u'Alfalfa':'Alfalfa',     
                  u'Almonds':'Al Pist',
                  u'Apples': 'Oth Dec',     
                  u'Apricots':'Oth Dec', 
                  u'Asparagus':'Oth Trk',   
                  u'Barley':'Grain', 
                  u'Broccoli':'Oth Trk', 
                  u'Cabbage':'Oth Trk',     
                  u'Camelina': 'Oth Fld',
                  u'Caneberries':'Oth Trk', 
                  u'Blueberries':'Oth Trk',
                  u'Canola':'Oth Fld',
                  u'Cantaloupes': 'Cucurb',
                  u'Carrots':'Oth Trk',     
                  u'Cauliflower':'Oth Trk', 
                  u'Cherries': 'Oth Dec',
                  u'Christmas Trees': 'Oth Trk',
                  u'Citrus':'Subtrop', 
                  u'Clover/Wildflowers':'Pasture',
                  u'Corn':'Corn', 
                  u'Cotton':'Cotton', 
                  u'Cranberries':'Oth Trk',
                  u'Cucumbers':'Cucurb', 
                  u'Dry Beans': 'DryBean',
                  u'Durum Wheat': 'Grain',
                  u'Garlic':'On Gar', 
                  u'Grapes':'Vine',
                  u'Grass/Pasture': "Pasture", 
                  u'Greens': "Oth Trk",
                  u'Herbs': 'Oth Trk',
                  u'Honeydew Melons': 'Cucurb', u'Lettuce': 'Oth Trk', 
                  u'Millet': 'Oth Fld',
                  u'Misc Vegs & Fruits': "Oth Dec",
                  u'Mustard': 'Oth Fld',                  
                  u'Nectarines': 'Oth Dec',
                  u'Oats': "Grain",
                  u'Olives': 'Subtrop', 
                  u'Onions': 'On Gar', 
                  u'Oranges': 'Subtrop',
                  u'Other Crops': 'Oth Fld',         
         u'Other Hay/Non Alfalfa': 'Grain',
         u'Other Tree Crops':'Oth Trk',
         u'Peaches': 'Oth Dec',
         u'Pears': 'Oth Dec',
         u'Peas': 'Oth Trk',
         u'Pecans': 'Oth Dec',
         u'Peppers': 'Oth Trk',
         u'Pistachios': 'Al Pist',
         u'Plums': 'Oth Dec',
         u'Pomegranates': 'Oth Dec',           
         u'Pop or Orn Corn': 'Corn',
         u'Potatoes': 'Potato',
         u'Prunes': 'Oth Dec',
         u'Pumpkins': 'Cucurb',
         u'Radishes': "SgrBeet",        
         u'Rice': 'Rice',
         u'Rye': 'Grain',
         u'Safflower': 'Safflwr',
         u'Sod/Grass Seed': 'Pasture',
         u'Sorghum': 'Oth Fld',
         u'Soybeans': 'DryBean',
         u'Spring Wheat': 'Grain',
         u'Squash': 'Cucurb',
         u'Strawberries': 'Oth Trk',
         u'Sugarbeets': 'SgrBeet',
         u'Sunflower': 'Oth Fld',
         u'Sweet Corn': 'Corn',
         u'Sweet Potatoes': 'Potato',
         u'Tomatoes': 'Pr Tom',
         u'Triticale': 'Grain',
         u'Turnips': 'SgrBeet',                           
         u'Vetch': 'Oth Trk',
         u'Walnuts': 'Oth Dec',
         u'Watermelons': 'Cucurb',
         u'Winter Wheat': 'Grain'}

In [49]:
my_map = cropN_mainCrop_dict.copy()
inv_map = {}
for k, v in my_map.iteritems():
    inv_map[v] = inv_map.get(v, [])
    inv_map[v].append(k)
print inv_map.keys()    
inv_map


['Alfalfa', 'Cucurb', 'Potato', 'Vine', 'Corn', 'On Gar', 'Oth Dec', 'DryBean', 'Cotton', 'Oth Trk', 'Oth Fld', 'Grain', 'Al Pist', 'Safflwr', 'Pasture', 'Subtrop', 'Rice', 'Pr Tom', 'SgrBeet']
Out[49]:
{'Al Pist': [u'Almonds', u'Pistachios'],
 'Alfalfa': [u'Alfalfa'],
 'Corn': [u'Corn', u'Sweet Corn', u'Pop or Orn Corn'],
 'Cotton': [u'Cotton'],
 'Cucurb': [u'Cantaloupes',
  u'Pumpkins',
  u'Honeydew Melons',
  u'Squash',
  u'Watermelons',
  u'Cucumbers'],
 'DryBean': [u'Dry Beans', u'Soybeans'],
 'Grain': [u'Barley',
  u'Spring Wheat',
  u'Durum Wheat',
  u'Triticale',
  u'Other Hay/Non Alfalfa',
  u'Rye',
  u'Winter Wheat',
  u'Oats'],
 'On Gar': [u'Garlic', u'Onions'],
 'Oth Dec': [u'Pears',
  u'Apricots',
  u'Misc Vegs & Fruits',
  u'Pomegranates',
  u'Cherries',
  u'Nectarines',
  u'Plums',
  u'Walnuts',
  u'Prunes',
  u'Apples',
  u'Pecans',
  u'Peaches'],
 'Oth Fld': [u'Sunflower',
  u'Other Crops',
  u'Sorghum',
  u'Camelina',
  u'Canola',
  u'Mustard',
  u'Millet'],
 'Oth Trk': [u'Blueberries',
  u'Vetch',
  u'Peppers',
  u'Other Tree Crops',
  u'Peas',
  u'Broccoli',
  u'Asparagus',
  u'Carrots',
  u'Cranberries',
  u'Cauliflower',
  u'Lettuce',
  u'Herbs',
  u'Strawberries',
  u'Cabbage',
  u'Greens',
  u'Christmas Trees',
  u'Caneberries'],
 'Pasture': [u'Clover/Wildflowers', u'Grass/Pasture', u'Sod/Grass Seed'],
 'Potato': [u'Sweet Potatoes', u'Potatoes'],
 'Pr Tom': [u'Tomatoes'],
 'Rice': [u'Rice'],
 'Safflwr': [u'Safflower'],
 'SgrBeet': [u'Radishes', u'Sugarbeets', u'Turnips'],
 'Subtrop': [u'Oranges', u'Olives', u'Citrus'],
 'Vine': [u'Grapes']}

In [50]:
SanJQ_single["Crop_Name"] = SanJQ_single["cdl_class_name"].map(cropN_mainCrop_dict)

In [51]:
##
cdl_null_df = SanJQ_single[pd.isnull(SanJQ_single).any(axis=1)]
print SanJQ_single.shape
print cdl_null_df.shape
collections.Counter(cdl_null_df.cdl_class_name)


(7055, 7)
(1357, 7)
Out[51]:
Counter({u'Aquaculture': 5,
         u'Barren': 108,
         u'Deciduous Forest': 86,
         u'Developed/High Intensity': 108,
         u'Developed/Low Intensity': 108,
         u'Developed/Med Intensity': 108,
         u'Developed/Open Space': 108,
         u'Evergreen Forest': 89,
         u'Fallow/Idle Cropland': 108,
         u'Forest': 8,
         u'Herbaceous Wetlands': 106,
         u'Mixed Forest': 91,
         u'Open Water': 108,
         u'Shrubland': 108,
         u'Wetlands': 6,
         u'Woody Wetlands': 102})

In [52]:
print 7055 - 1357
SanJQ_single = SanJQ_single.dropna()
SanJQ_single.shape


5698
Out[52]:
(5698, 7)

In [53]:
SanJQ_single.head()


Out[53]:
Basin_Name Subbasin_N year cdl_class_name acres county Crop_Name
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin Pasture
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965 San Joaquin Oth Dec
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532 San Joaquin Vine
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525 San Joaquin Corn
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747 San Joaquin Al Pist

In [54]:
pwd


Out[54]:
u'/Users/yanlinli/Dropbox/141groupprj/OriginalData'

In [55]:
path='/Users/yanlinli/Dropbox/141groupprj/OriginalData'
os.listdir(path)


Out[55]:
['.DS_Store',
 'Ag_2010.xlsx',
 'cdl_ca_gw_basins.xlsx',
 'Lati_Longi.xlsx',
 'OriginalData copy',
 'OtherTxt',
 'precipitation.xlsx',
 'Sq_long_La_16Sub.xlsx',
 'SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip.xlsx',
 'WaterUsage_Per_Crop_2010.xlsx']

In [56]:
Ag_2010 = pd.read_excel('Ag_2010.xlsx')

In [57]:
Ag_2010 = Ag_2010.drop('Year', 1)
Ag_2010.index = Ag_2010['Co_Name']
Ag_2010.head()


Out[57]:
Co_Name Grain AW WA Rice AW WA Cotton AW WA SgrBeet AW WA Corn AW WA DryBean AW WA Safflwr AW WA Oth Fld AW WA Alfalfa AW WA ... Pr Tom AW WA Fr Tom AW WA Cucurb AW WA On Gar AW WA Potato AW WA Oth Trk AW WA Al Pist AW WA Oth Dec AW WA Subtrop AW WA Vine AW WA
Co_Name
Alameda Alameda 1.227136 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 5.566410 ... 2.427646 1.978039 1.332642 2.638490 2.720195 3.322419 4.012059 4.089118 3.200228 1.322920
Alpine Alpine 1.036609 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 3.979483 ... 2.427646 1.978039 1.332642 2.638490 2.720195 1.745486 3.435330 3.093030 2.750411 1.473550
Amador Amador 0.152481 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.703687 3.511449 ... 2.427646 1.978039 1.332642 2.638490 2.720195 2.418772 3.435330 2.729704 2.750411 0.581198
Butte Butte 0.151040 4.200113 3.5756 3.209961 2.263332 1.508677 0.876382 1.660225 3.151581 ... 2.427646 1.978039 0.983734 2.164283 2.720195 1.758203 2.264204 2.192851 1.919587 1.585771
Calaveras Calaveras 0.258701 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 1.517600 3.519420 ... 2.427646 1.978039 1.332642 2.638490 2.720195 1.063208 3.435330 2.741638 2.401690 0.664280

5 rows × 21 columns


In [58]:
Ag_2010 = Ag_2010.drop('Co_Name', 1)
Ag_2010.head()


Out[58]:
Grain AW WA Rice AW WA Cotton AW WA SgrBeet AW WA Corn AW WA DryBean AW WA Safflwr AW WA Oth Fld AW WA Alfalfa AW WA Pasture AW WA Pr Tom AW WA Fr Tom AW WA Cucurb AW WA On Gar AW WA Potato AW WA Oth Trk AW WA Al Pist AW WA Oth Dec AW WA Subtrop AW WA Vine AW WA
Co_Name
Alameda 1.227136 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 5.566410 4.150807 2.427646 1.978039 1.332642 2.638490 2.720195 3.322419 4.012059 4.089118 3.200228 1.322920
Alpine 1.036609 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 3.979483 4.191053 2.427646 1.978039 1.332642 2.638490 2.720195 1.745486 3.435330 3.093030 2.750411 1.473550
Amador 0.152481 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.703687 3.511449 3.753556 2.427646 1.978039 1.332642 2.638490 2.720195 2.418772 3.435330 2.729704 2.750411 0.581198
Butte 0.151040 4.200113 3.5756 3.209961 2.263332 1.508677 0.876382 1.660225 3.151581 3.446727 2.427646 1.978039 0.983734 2.164283 2.720195 1.758203 2.264204 2.192851 1.919587 1.585771
Calaveras 0.258701 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 1.517600 3.519420 3.477850 2.427646 1.978039 1.332642 2.638490 2.720195 1.063208 3.435330 2.741638 2.401690 0.664280

In [59]:
Ag_2010.rename(columns=lambda x: x.replace('AW', '').replace('WA','').strip(), inplace=True)

In [60]:
Ag_2010.head()


Out[60]:
Grain Rice Cotton SgrBeet Corn DryBean Safflwr Oth Fld Alfalfa Pasture Pr Tom Fr Tom Cucurb On Gar Potato Oth Trk Al Pist Oth Dec Subtrop Vine
Co_Name
Alameda 1.227136 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 5.566410 4.150807 2.427646 1.978039 1.332642 2.638490 2.720195 3.322419 4.012059 4.089118 3.200228 1.322920
Alpine 1.036609 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.226513 3.979483 4.191053 2.427646 1.978039 1.332642 2.638490 2.720195 1.745486 3.435330 3.093030 2.750411 1.473550
Amador 0.152481 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 2.703687 3.511449 3.753556 2.427646 1.978039 1.332642 2.638490 2.720195 2.418772 3.435330 2.729704 2.750411 0.581198
Butte 0.151040 4.200113 3.5756 3.209961 2.263332 1.508677 0.876382 1.660225 3.151581 3.446727 2.427646 1.978039 0.983734 2.164283 2.720195 1.758203 2.264204 2.192851 1.919587 1.585771
Calaveras 0.258701 4.885808 3.5756 3.209961 2.263332 1.940346 1.601214 1.517600 3.519420 3.477850 2.427646 1.978039 1.332642 2.638490 2.720195 1.063208 3.435330 2.741638 2.401690 0.664280

In [61]:
#but i don't know how to creat 2d df from here 
unstack_df = pd.DataFrame(Ag_2010.unstack())
unstack_df.head()
#unstack_df.index.levels


Out[61]:
0
Co_Name
Grain Alameda 1.227136
Alpine 1.036609
Amador 0.152481
Butte 0.151040
Calaveras 0.258701

In [62]:
from itertools import chain
county_list = []
for i in Ag_2010.index:
    county_list.append([i]*20) 
county_list = list(chain(*county_list))
print 'county list len', len(county_list)  
crop_list = list(chain(*([Ag_2010.columns.values]*57)))
len(crop_list )


county list len 1140
Out[62]:
1140

In [63]:
Ag_2010_df = pd.DataFrame(list(chain(*Ag_2010.values)))
Ag_2010_df = Ag_2010_df.rename(columns={0:'WaterUsagePerAcre'})
Ag_2010_df['County'] =  pd.DataFrame(county_list)
Ag_2010_df['Crop'] =  pd.DataFrame(crop_list)
Ag_2010_df.head()


Out[63]:
WaterUsagePerAcre County Crop
0 1.227136 Alameda Grain
1 4.885808 Alameda Rice
2 3.575600 Alameda Cotton
3 3.209961 Alameda SgrBeet
4 2.263332 Alameda Corn

In [64]:
print set(collections.Counter(SanJQ_single.Crop_Name))
print len(set(collections.Counter(SanJQ_single.Crop_Name)))
print set(collections.Counter(Ag_2010_df.Crop))
print len(set(collections.Counter(Ag_2010_df.Crop)))
print set(collections.Counter(Ag_2010_df.Crop))& set(collections.Counter(SanJQ_single.Crop_Name))
print len(set(collections.Counter(Ag_2010_df.Crop))&set(collections.Counter(SanJQ_single.Crop_Name)))


set(['Alfalfa', 'Cucurb', 'Potato', 'Oth Dec', 'Corn', 'On Gar', 'Vine', 'DryBean', 'Cotton', 'Oth Trk', 'Oth Fld', 'Grain', 'Al Pist', 'Safflwr', 'Pasture', 'Subtrop', 'Rice', 'Pr Tom', 'SgrBeet'])
19
set([u'Alfalfa', u'Cucurb', u'SgrBeet', u'Potato', u'Oth Dec', u'Corn', u'On Gar', u'Vine', u'Oth Fld', u'DryBean', u'Oth Trk', u'Fr Tom', u'Grain', u'Al Pist', u'Safflwr', u'Pasture', u'Subtrop', u'Rice', u'Pr Tom', u'Cotton'])
20
set(['Alfalfa', 'Cucurb', 'SgrBeet', 'Potato', 'Vine', 'Corn', 'On Gar', 'Oth Dec', 'DryBean', 'Oth Trk', 'Oth Fld', 'Grain', 'Al Pist', 'Safflwr', 'Pasture', 'Subtrop', 'Rice', 'Pr Tom', 'Cotton'])
19

In [65]:
print set(collections.Counter(SanJQ_single.county))
set(collections.Counter(Ag_2010_df.County))& set(collections.Counter(SanJQ_single.county))


set(['San Joaquin', 'Fresno', 'Merced', 'Kern', 'Madera', 'Kings', 'Tulare', 'Stanislaus'])
Out[65]:
{'Fresno',
 'Kern',
 'Kings',
 'Madera',
 'Merced',
 'San Joaquin',
 'Stanislaus',
 'Tulare'}

In [66]:
SanJQ_single = SanJQ_single.rename(columns={'county':'County','Crop_Name':'Crop'})
print SanJQ_single.shape
print SanJQ_single.shape
SanJQ_single.head()


(5698, 7)
(5698, 7)
Out[66]:
Basin_Name Subbasin_N year cdl_class_name acres County Crop
0 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin Pasture
1 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Walnuts 73249.299965 San Joaquin Oth Dec
2 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Grapes 62451.585532 San Joaquin Vine
3 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Corn 55180.608525 San Joaquin Corn
4 SAN JOAQUIN VALLEY EASTERN SAN JOAQUIN 2010 Almonds 48995.140747 San Joaquin Al Pist

In [67]:
ALL = pd.merge(SanJQ_single, Ag_2010_df, on=['County', 'Crop'])
ALL.shape
ALL = ALL.drop('Basin_Name',1)

In [68]:
print sum(ALL.isnull().any(axis=1))
ALL.head()


0
Out[68]:
Subbasin_N year cdl_class_name acres County Crop WaterUsagePerAcre
0 EASTERN SAN JOAQUIN 2010 Grass/Pasture 238527.587650 San Joaquin Pasture 5.139264
1 EASTERN SAN JOAQUIN 2010 Clover/Wildflowers 4893.576132 San Joaquin Pasture 5.139264
2 EASTERN SAN JOAQUIN 2010 Sod/Grass Seed 2493.490983 San Joaquin Pasture 5.139264
3 EASTERN SAN JOAQUIN 2011 Grass/Pasture 238056.332977 San Joaquin Pasture 5.139264
4 EASTERN SAN JOAQUIN 2011 Clover/Wildflowers 5796.054407 San Joaquin Pasture 5.139264

In [69]:
collections.Counter(ALL.Subbasin_N)
print collections.Counter(ALL.County)
list(ALL)


Counter({'Stanislaus': 1070, 'San Joaquin': 1027, 'Kings': 892, 'Tulare': 684, 'Madera': 622, 'Kern': 572, 'Fresno': 492, 'Merced': 339})
Out[69]:
[u'Subbasin_N',
 u'year',
 u'cdl_class_name',
 u'acres',
 'County',
 'Crop',
 'WaterUsagePerAcre']

In [70]:
ALL[(ALL.Subbasin_N =='CHOWCHILLA') & (ALL.year ==2010) & (ALL.County=='Madera') & (ALL.Crop =='Al Pist')]


Out[70]:
Subbasin_N year cdl_class_name acres County Crop WaterUsagePerAcre
2448 CHOWCHILLA 2010 Almonds 32844.160041 Madera Al Pist 3.344023
2449 CHOWCHILLA 2010 Pistachios 4114.304601 Madera Al Pist 3.344023

In [71]:
32844.160041+4114.304601 #match the fisrt number in the below df


Out[71]:
36958.464642000006

In [72]:
f = {'acres':['sum'] }
ALL_merge = ALL.groupby([u'Subbasin_N', u'year',
             u'County', u'Crop', u'WaterUsagePerAcre']).agg(f)

In [73]:
ALL_merge.head()


Out[73]:
acres
sum
Subbasin_N year County Crop WaterUsagePerAcre
CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642
Alfalfa 4.318180 34392.028150
Corn 2.545571 6004.104783
Cotton 3.152639 3535.633219
Cucurb 1.605352 163.237815

In [74]:
print ALL_merge.shape
ALL.shape


(1879, 1)
Out[74]:
(5698, 7)

In [75]:
Names = pd.DataFrame([ALL_merge.ix[i,].name for i in range(0,1879)])
Values = pd.DataFrame([ALL_merge.ix[i,].values for i in range(0,1879)])
ALL2 = pd.concat([Names, Values], axis=1,ignore_index=True)

In [76]:
ALL2.shape


Out[76]:
(1879, 6)

In [77]:
Values.shape


Out[77]:
(1879, 1)

In [78]:
ALL2 = ALL2.rename(columns = {0:u'Subbasin_N', 1:u'Year', 2:'County',
                       3:u'Crop',4:u'WaterUsagePerAcre', 5:u'Acre'})
print ALL2.shape
ALL2.head()


(1879, 6)
Out[78]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815

In [3]:
path='/Users/yanlinli/Dropbox/141groupprj/OriginalData'
os.listdir(path)


Out[3]:
['.DS_Store',
 'Ag_2010.xlsx',
 'cdl_ca_gw_basins.xlsx',
 'Lati_Longi.xlsx',
 'OriginalData copy',
 'OtherTxt',
 'precipitation.xlsx',
 'Sq_long_La_16Sub.xlsx',
 'SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip.xlsx',
 'SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx',
 'WaterUsage_Per_Crop_2010.xlsx']

In [80]:
precipitation = pd.read_excel('precipitation.xlsx')

In [81]:
collections.Counter(ALL2.County)


Out[81]:
Counter({'Fresno': 191,
         'Kern': 196,
         'Kings': 300,
         'Madera': 213,
         'Merced': 110,
         'San Joaquin': 324,
         'Stanislaus': 330,
         'Tulare': 215})

In [82]:
collections.Counter(precipitation.County_N)


Out[82]:
Counter({u'Fresno': 6,
         u'Kern': 6,
         u'Kings': 6,
         u'Madera': 6,
         u'Merced': 6,
         u'San Joaquin': 6,
         u'Stanislaus': 6,
         u'Tulare': 6})

In [83]:
precipitation = precipitation.rename(columns = {'County_N':u'County','year':u'Year'})

In [84]:
precipitation.head()


Out[84]:
County zipcode Year Precip
0 Fresno 93210 2010 7.273551
1 Fresno 93210 2011 6.897796
2 Fresno 93210 2012 1.789807
3 Fresno 93210 2013 2.816295
4 Fresno 93210 2014 5.495055

In [85]:
ALL3 = pd.merge(ALL2, precipitation, on=['County', 'Year'])

In [86]:
ALL3.shape


Out[86]:
(1879, 8)

In [87]:
ALL3.head()


Out[87]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642 93638 14.15
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815 93638 14.15

In [89]:
#ALL3.to_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip.xlsx', sheet_name='sheet1', index=False)

In [90]:
pwd


Out[90]:
u'/Users/yanlinli/Dropbox/141groupprj/OriginalData'

In [91]:
#cropN_mainCrop_dict

In [90]:
CropGroup = pd.DataFrame(cropN_mainCrop_dict.items())
CropGroup = CropGroup.rename(columns = {0:u'SubCrop',1:u'Crop'})
CropGroup.head()


Out[90]:
SubCrop Crop
0 Cantaloupes Cucurb
1 Olives Subtrop
2 Blueberries Oth Trk
3 Durum Wheat Grain
4 Rye Grain

In [91]:
collections.Counter(ALL3.Subbasin_N)
##u'KETTLEMAN PLAIN': 84,
##u'WHITE WOLF': 88})                 
#Sq.to_excel('Sq_long_La_16Sub.xlsx', sheet_name='sheet1', index=False)


Out[91]:
Counter({u'CHOWCHILLA': 106,
         u'COSUMNES': 104,
         u'DELTA-MENDOTA': 112,
         u'EASTERN SAN JOAQUIN': 110,
         u'KAWEAH': 109,
         u'KERN COUNTY': 108,
         u'KETTLEMAN PLAIN': 84,
         u'KINGS': 111,
         u'MADERA': 107,
         u'MERCED': 110,
         u'MODESTO': 110,
         u'PLEASANT VALLEY': 82,
         u'TRACY': 110,
         u'TULARE LAKE': 105,
         u'TULE': 106,
         u'TURLOCK': 108,
         u'WESTSIDE': 109,
         u'WHITE WOLF': 88})

In [92]:
collections.Counter(ALL3.County)


Out[92]:
Counter({'Fresno': 191,
         'Kern': 196,
         'Kings': 300,
         'Madera': 213,
         'Merced': 110,
         'San Joaquin': 324,
         'Stanislaus': 330,
         'Tulare': 215})

In [94]:
SqLL = pd.read_excel('Sq_long_La_16Sub.xlsx')

In [98]:
#SqLL.head()
#collections.Counter(SqLL.Subbasin_N)

In [102]:
collections.Counter(ALL3.Crop).keys()


Out[102]:
['Alfalfa',
 'Cucurb',
 'SgrBeet',
 'Potato',
 'Vine',
 'Corn',
 'On Gar',
 'Oth Dec',
 'DryBean',
 'Oth Trk',
 'Oth Fld',
 'Grain',
 'Al Pist',
 'Safflwr',
 'Pasture',
 'Subtrop',
 'Rice',
 'Pr Tom',
 'Cotton']

In [107]:
# data from price value in 2013 
# econ value is dollar per acers
def avg(l):
    return sum(l, 0.0) / len(l)

Econ_dict = { "Al Pist":2360*3.21, 
         "Alfalfa":7.0*206.00,
         "Corn": 26.50*48.23,
         "Cotton":1628*140.00, 
         "Cucurb":avg([260*20.20, 180*35.40, 200*25.90,580*13.00,300*16.00,330*15.60]),
#Honeydew Melons  260 2,730,000 20.20 Cwt. Cwt. $/Cwt.
#"Squash"   180 1,224,000 35.40   Cwt. Cwt. $/Cwt.
#"Cucumbers"  200 760,000 25.90   Cwt. Cwt. $/Cwt.
#"Watermelons"  580 5,800,000 13.00  Cwt. Cwt. $/Cwt.
#"Cantaloupes"   300 12,750,000 16.00  Cwt. Cwt. $/Cwt.
#"Pumpkins    330 1,947,000 15.60   Cwt. Cwt. $/Cwt.

         "DryBean": 2320*56.80, 
         "Grain":5.35*190.36,
         "On Gar":avg([ 400*13.20,165*60.30 ]), 
#"Onions"  spring 400 2,720,000 13.20   summer  490 3,822,000 6.40   Onions, Summer Storage 399 11,700,000 9.11
# "Garlic"   165 3,795,000 60.30
         "Oth Dec":avg([ 8.88*466  ,5.73*682  ,2.48*3390  ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06    ]),
#"Apples" 8.88 135,000 466  Tons Tons $/Ton
#"Apricots"  5.73 54,400 682   Tons Tons $/Ton
#"Cherries", 2.48 82,000 3,390  Tons Tons $/Ton
#"Pears",  19.00 220,000 391  Tons Tons $/Ton
#"Nectarines"  8.33 150,000 780 Tons Tons $/Ton
#"Peaches", 14.10 648,000 429 Tons Tons $/Ton
#"Plums",  5.30 95,400 664   Tons Tons $/Ton
#"Walnuts" 1.76 492,000 3,710  #tones Tons $/Ton
#"Pecans"  1,750  5,000 2.06  Pounds 1000pounds $/Pound
         "Oth Fld":avg([1296.00* 27.1, 17.00*37.56]),
# sunflowers 1,296.00 751,500 27.1   Tons Tons $/Ton
 # Sorghum2009   17.00 646,000 37.56   Tons Tons $/Ton
         "Oth Trk":avg([320*29.60, 350*24.90, 32*152.00, 180*42.70, 107*248.00,425*41.70,385* 38.70 ,165*42.10,405*21.70 ]),
#"Carrots" 320 20,000,000 29.60  Cwt. Cwt. $/Cwt.
#"Lettuce"  350 33,600,000 24.90 Cwt. Cwt. $/Cwt.
#"Asparagus"  32  368,000  152.00  Cwt. Cwt. $/Cwt.
#"Cauliflower"  180 5,868,000 42.70  Cwt. Cwt. $/Cwt.
# berries  107 514,000 248.00 Cwt. Cwt. $/Cwt.
# "Peppers Bell", 425 8,465,000 41.70  Cwt. Cwt. $/Cwt.
# pepers Chile    385 2,640,000 38.70   Cwt. Cwt. $/Cwt.
# "Broccoli",  165 20,460,000 42.10 8   Cwt. Cwt. $/Cwt.
# "Cabbage",  405 5,670,000 21.70   Cwt. Cwt. $/Cwt.
         "Pasture":0, 
         "Potato":425*17.1, # Cwt. Cwt. $/Cwt.
         "Pro Tom":300*36.20, # Cwt. Cwt. $/Cwt 
         "Rice":84.80*20.9, # Cwt. Cwt. $/Cwt
         "Safflwr": 2000.00*26.5, #  Pounds Cwt. $/Cwt.
         "SgrBeet": 43.40*52.1,  # Tons Tons $/Ton
          "Subtrop":avg([622*6.52,4.15*813  ]), 
# orange 622 109000000 6.52
# Olives  4.15 166000 813  Tons Tons $/Ton
          "Vine":900*5.07}# Cartons 3/ Cartons $/Carton


Econ_dict


Out[107]:
{'Al Pist': 7575.6,
 'Alfalfa': 1442.0,
 'Corn': 1278.095,
 'Cotton': 227920.0,
 'Cucurb': 5715.333333333333,
 'DryBean': 131776.0,
 'Grain': 1018.426,
 'On Gar': 7614.75,
 'Oth Dec': 5564.693333333333,
 'Oth Fld': 17880.059999999998,
 'Oth Trk': 11736.666666666666,
 'Pasture': 0,
 'Potato': 7267.500000000001,
 'Pro Tom': 10860.0,
 'Rice': 1772.3199999999997,
 'Safflwr': 53000.0,
 'SgrBeet': 2261.14,
 'Subtrop': 3714.6949999999997,
 'Vine': 4563.0}

In [109]:
ALL3["EcnValue"] = ALL3["Crop"].map(Econ_dict)
ALL3.to_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx', sheet_name='sheet1', index=False)

In [110]:
ALL3.head()


Out[110]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815 93638 14.15 5715.333333

In [ ]:


In [ ]:


In [82]:
SqLL = pd.read_excel('Sq_long_La_16Sub.xlsx')
print SqLL.shape
SqLL.head()


(18675, 4)
Out[82]:
Basin_Name Subbasin_N longitude latitude
0 SAN JOAQUIN VALLEY KERN COUNTY -119.088229 35.791916
1 SAN JOAQUIN VALLEY KERN COUNTY -119.088728 35.790731
2 SAN JOAQUIN VALLEY KERN COUNTY -119.012405 35.790597
3 SAN JOAQUIN VALLEY KERN COUNTY -119.009633 35.790596
4 SAN JOAQUIN VALLEY KERN COUNTY -119.009541 35.790596

In [85]:
type(SqLL)
print SqLL.latitude.dtype
print SqLL.longitude.dtype
Sub_Lon_Lat = SqLL.groupby(['Subbasin_N']).mean()
Sub_Lon_Lat = Sub_Lon_Lat.reset_index(True)


float64
float64

In [86]:
Sub_Lon_Lat


Out[86]:
Subbasin_N longitude latitude
0 CHOWCHILLA -120.341671 37.090853
1 COSUMNES -121.078087 38.330043
2 DELTA-MENDOTA -120.929047 37.041357
3 EASTERN SAN JOAQUIN -121.036118 38.016319
4 KAWEAH -119.080095 36.307722
5 KERN COUNTY -119.319665 35.317558
6 KETTLEMAN PLAIN -120.011155 35.872416
7 KINGS -119.504783 36.685465
8 MADERA -119.917505 37.030662
9 MERCED -120.427700 37.328021
10 MODESTO -120.770918 37.715208
11 PLEASANT VALLEY -120.110955 35.964013
12 TRACY -121.603805 37.873860
13 TULARE LAKE -119.722563 36.229246
14 TULE -119.005959 35.987088
15 TURLOCK -120.742345 37.520860
16 WESTSIDE -120.404486 36.440500
17 WHITE WOLF -118.876929 35.032244

In [87]:
Sub_Lon_Lat.to_excel('18Sub_Lon_Lat.xlsx', sheet_name='sheet1', index=False)