In [4]:
from urllib2 import Request, urlopen
from urlparse import urlparse, urlunparse
import requests, requests_cache
import pandas as pd
import json
import os
import numpy as np

from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline


from urllib2 import Request, urlopen

import requests
# In terminal: conda install requests
import requests_cache
# In terminal: pip install requests_cache

In [5]:
pwd


Out[5]:
u'/Users/kaijin'

In [6]:
os.chdir('/Users/kaijin/Downloads')

In [7]:
KAWEAH=pd.read_csv('San_Joaquin_Valley.csv')
KAWEAH['count'] = pd.Series(1, index =KAWEAH.index )
f = {'Acres':['sum'], 'WaterUsage':['mean'], 'UsageTotal':['sum'], 'count':['sum']}
KAWEAH.groupby(['Subbasin_N', 'County_N', 'Year', 'CropName']).agg(f).head()


Out[7]:
WaterUsage count UsageTotal Acres
mean sum sum sum
Subbasin_N County_N Year CropName
CHOWCHILLA Madera 2010 Al Pist 3.344023 2 123589.945360 36958.464641
Alfalfa 4.318180 1 148510.980100 34392.028150
Corn 2.545571 3 24652.905436 9684.628242
Cotton 3.152639 1 11146.575720 3535.633219
Cucurb 1.605352 2 262.054178 163.237815

In [8]:
county_name=np.unique(KAWEAH["County_N"])

Lets extract the zipcode according to the county name


In [64]:
for i in range(9):
    print county_name[i]


Fresno
Kern
Kings
Madera
Merced
Sacramento
San Joaquin
Stanislaus
Tulare

In [65]:
zipcode=[93210,93263,93202,93638,93620,95641,95242,95326,93201]
ZipcodeList=[{ "County_N":county_name[i], "zipcode":zipcode[i] } for i in range(len(zipcode))]
COUNTYZIP=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode"])
COUNTYZIP


Out[65]:
County_N zipcode
0 Fresno 93210
1 Kern 93263
2 Kings 93202
3 Madera 93638
4 Merced 93620
5 Sacramento 95641
6 San Joaquin 95242
7 Stanislaus 95326
8 Tulare 93201

Lets extract the zipcode and precipetation data from California Department of Water Resources http://et.water.ca.gov/Rest/Index


In [66]:
start="2010-01-01"
def ndb_search(term,start,end,verbose = False):
    """
    This takes all of the necessary parameters to form a query 
    Input: key (data.gov API key, string), term (type, string)
    Output: JSON object
    """
    url = "http://et.water.ca.gov/api/data"
    response = requests.get(url, params = {
        "targets": term,
        "appKey":"90e36c84-3f23-48a3-becd-1865076a04fd",
        "startDate":start,
        "EndDate":end,
        "dataItems": "day-precip"  
    })
    response.raise_for_status() # check for errors
    if verbose:
        print response.url

    return response.json() # parse JSON

In [67]:
Tulare2010_Recode=Tulare2010["Data"]["Providers"][0]['Records']
len(Tulare2010_Recode)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-67-282a1ceb064d> in <module>()
----> 1 Tulare2010_Recode=Tulare2010["Data"]["Providers"][0]['Records']
      2 len(Tulare2010_Recode)

NameError: name 'Tulare2010' is not defined

In [68]:
#note inside a county there may be multilple station that recode the data 
# we take the mean then times 365 to get one year rain 
# note the value is inches

In [69]:
precip=[ Tulare2010_Recode[i]['DayPrecip']['Value'] for i in range(len(Tulare2010_Recode))]
precip2=np.array(precip).astype(np.float)
#precip2


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-69-db5103e07808> in <module>()
----> 1 precip=[ Tulare2010_Recode[i]['DayPrecip']['Value'] for i in range(len(Tulare2010_Recode))]
      2 precip2=np.array(precip).astype(np.float)
      3 #precip2

NameError: name 'Tulare2010_Recode' is not defined

In [70]:
#WRITE INTO FUNCTIONS
def precip_cal(term,year,verbose = False):
    """
    This takes zipcode and year gives precipitaion of a year 
    Input: term (zipcode, int), year (year, int)
    Output: precipitation of a year and a certain county 
    """
    start="{}-01-01".format("".join(str(year)))
    end="{}-12-31".format("".join(str(year)))

    Tulare2010=ndb_search(term,start,end,verbose = False)
    Tulare2010_Recode=Tulare2010["Data"]["Providers"][0]['Records']
    precip=[ Tulare2010_Recode[i]['DayPrecip']['Value'] for i in range(len(Tulare2010_Recode))]
    precip2=np.array(precip).astype(np.float)
  
    return np.nanmean(precip2)*365 # parse JSON

In [71]:
year=[2010,2011,2012,2013,2014,2015]
ZipcodeList=[{ "County_N":county_name[i], "zipcode":zipcode[i],"year":year[j]} for i in range(len(zipcode)) for j in range(6) ]
ZipcodeList
COUNTYYear=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode","year"])
x=[precip_cal(COUNTYYear["zipcode"][i],COUNTYYear["year"][i]) for i in xrange(54) ]

In [74]:
COUNTYYear=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode","year"])
COUNTYYear["Precip"]=x
COUNTYYear


Out[74]:
County_N zipcode year Precip
0 Fresno 93210 2010 7.273551
1 Fresno 93210 2011 6.897796
2 Fresno 93210 2012 1.789807
3 Fresno 93210 2013 2.816295
4 Fresno 93210 2014 5.495055
5 Fresno 93210 2015 4.742995
6 Kern 93263 2010 13.194598
7 Kern 93263 2011 5.730000
8 Kern 93263 2012 8.691563
9 Kern 93263 2013 2.696396
10 Kern 93263 2014 4.540000
11 Kern 93263 2015 3.530000
12 Kings 93202 2010 12.658078
13 Kings 93202 2011 5.350000
14 Kings 93202 2012 5.564754
15 Kings 93202 2013 2.300000
16 Kings 93202 2014 5.140000
17 Kings 93202 2015 4.200000
18 Madera 93638 2010 14.150000
19 Madera 93638 2011 9.760000
20 Madera 93638 2012 7.409699
21 Madera 93638 2013 3.151811
22 Madera 93638 2014 6.460000
23 Madera 93638 2015 5.823823
24 Merced 93620 2010 12.036767
25 Merced 93620 2011 7.320000
26 Merced 93620 2012 5.190779
27 Merced 93620 2013 248.857849
28 Merced 93620 2014 6.880000
29 Merced 93620 2015 9.379241
30 Sacramento 95641 2010 18.330000
31 Sacramento 95641 2011 13.480000
32 Sacramento 95641 2012 17.362432
33 Sacramento 95641 2013 5.310000
34 Sacramento 95641 2014 16.465110
35 Sacramento 95641 2015 7.416934
36 San Joaquin 95242 2010 19.440000
37 San Joaquin 95242 2011 10.060000
38 San Joaquin 95242 2012 17.541940
39 San Joaquin 95242 2013 6.100000
40 San Joaquin 95242 2014 14.680220
41 San Joaquin 95242 2015 10.625556
42 Stanislaus 95326 2010 17.430000
43 Stanislaus 95326 2011 12.000000
44 Stanislaus 95326 2012 9.484016
45 Stanislaus 95326 2013 3.940000
46 Stanislaus 95326 2014 11.530000
47 Stanislaus 95326 2015 6.990000
48 Tulare 93201 2010 12.825000
49 Tulare 93201 2011 5.841017
50 Tulare 93201 2012 5.499932
51 Tulare 93201 2013 3.120000
52 Tulare 93201 2014 3.670000
53 Tulare 93201 2015 5.035000

In [75]:
COUNTYYear
# unit for precip is inch 
newtable=pd.merge(KAWEAH, COUNTYYear,how="right")
f = {'Acres':['sum'], 'WaterUsage':['mean'], 'UsageTotal':['sum'], 'count':['sum'],"Precip":['mean']}
grouped_data=newtable.groupby(['Subbasin_N', 'County_N', 'Year', 'CropName']).agg(f)

In [283]:
cropname=np.unique(KAWEAH["CropName"])

In [284]:
cropname


Out[284]:
array(['Al Pist', 'Alfalfa', 'Corn', 'Cotton', 'Cucurb', 'DryBean',
       'Grain', 'On Gar', 'Oth Dec', 'Oth Fld', 'Oth Trk ', 'Pasture',
       'Potato', 'Pro Tom', 'Rice', 'Safflwr', 'SgrBeet', 'Subtrop', 'Vine'], dtype=object)

In [286]:
for i in range(len(cropname)):
    print corpname[i]
len(cropname)


Al Pist
Alfalfa
Corn
Cotton
Cucurb
DryBean
Grain
On Gar
Oth Dec
Oth Fld
Oth Trk 
Pasture
Potato
Pro Tom
Rice
Safflwr
SgrBeet
Subtrop
Vine
Out[286]:
19

In [20]:
def avg(l):
    return sum(l, 0.0) / len(l)
avg([1*3,2*5])
1628*140.00
avg([ 8.88*466  ,5.73*682  ,2.48*3390  ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06    ])


Out[20]:
5564.693333333333

In [28]:
# data from price value in 2013 
# econ value is dollar per acers

Econ_dict = { "Al Pist":2360*3.21, 
         "Alfalfa":7.0*206.00,
         "Corn": 26.50*48.23,
         "Cotton":1628*140.00, 
         "Cucurb":avg([260*20.20, 180*35.40, 200*25.90,580*13.00,300*16.00,330*15.60]),
#Honeydew Melons  260 2,730,000 20.20 Cwt. Cwt. $/Cwt.
#"Squash"   180 1,224,000 35.40   Cwt. Cwt. $/Cwt.
#"Cucumbers"  200 760,000 25.90   Cwt. Cwt. $/Cwt.
#"Watermelons"  580 5,800,000 13.00  Cwt. Cwt. $/Cwt.
#"Cantaloupes"   300 12,750,000 16.00  Cwt. Cwt. $/Cwt.
#"Pumpkins    330 1,947,000 15.60   Cwt. Cwt. $/Cwt.

         "DryBean": 2320*56.80, 
         "Grain":5.35*190.36,
         "On Gar":avg([ 400*13.20,165*60.30 ]), 
#"Onions"  spring 400 2,720,000 13.20   summer  490 3,822,000 6.40   Onions, Summer Storage 399 11,700,000 9.11
# "Garlic"   165 3,795,000 60.30

         "Oth Dec":avg([ 8.88*466  ,5.73*682  ,2.48*3390  ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06    ]),
#"Apples" 8.88 135,000 466  Tons Tons $/Ton
#"Apricots"  5.73 54,400 682   Tons Tons $/Ton
#"Cherries", 2.48 82,000 3,390  Tons Tons $/Ton
#"Pears",  19.00 220,000 391  Tons Tons $/Ton
#"Nectarines"  8.33 150,000 780 Tons Tons $/Ton
#"Peaches", 14.10 648,000 429 Tons Tons $/Ton
#"Plums",  5.30 95,400 664   Tons Tons $/Ton
#"Walnuts" 1.76 492,000 3,710  #tones Tons $/Ton
#"Pecans"  1,750  5,000 2.06  Pounds 1000pounds $/Pound
         "Oth Fld":avg([1296.00* 27.1, 17.00*37.56]),
# sunflowers 1,296.00 751,500 27.1   Tons Tons $/Ton
 # Sorghum2009   17.00 646,000 37.56   Tons Tons $/Ton
         "Oth Trk":avg([320*29.60, 350*24.90, 32*152.00, 180*42.70, 107*248.00,425*41.70,385* 38.70 ,165*42.10,405*21.70 ]),
#"Carrots" 320 20,000,000 29.60  Cwt. Cwt. $/Cwt.
#"Lettuce"  350 33,600,000 24.90 Cwt. Cwt. $/Cwt.
#"Asparagus"  32  368,000  152.00  Cwt. Cwt. $/Cwt.
#"Cauliflower"  180 5,868,000 42.70  Cwt. Cwt. $/Cwt.
# berries  107 514,000 248.00 Cwt. Cwt. $/Cwt.
# "Peppers Bell", 425 8,465,000 41.70  Cwt. Cwt. $/Cwt.
# pepers Chile    385 2,640,000 38.70   Cwt. Cwt. $/Cwt.
# "Broccoli",  165 20,460,000 42.10 8   Cwt. Cwt. $/Cwt.
# "Cabbage",  405 5,670,000 21.70   Cwt. Cwt. $/Cwt.
         "Pasture":0, 
         "Potato":425*17.1, # Cwt. Cwt. $/Cwt.
         "Pro Tom":300*36.20, # Cwt. Cwt. $/Cwt 
         "Rice":84.80*20.9, # Cwt. Cwt. $/Cwt
         "Safflwr": 2000.00*26.5, #  Pounds Cwt. $/Cwt.
         "SgrBeet": 43.40*52.1,  # Tons Tons $/Ton
          "Subtrop":avg([622*6.52,4.15*813  ]), 
# orange 622 109000000 6.52
# Olives  4.15 166000 813  Tons Tons $/Ton

          "Vine":900*5.07}# Cartons 3/ Cartons $/Carton


Econ_dict


Out[28]:
{'Al Pist': 7575.6,
 'Alfalfa': 1442.0,
 'Corn': 1278.095,
 'Cotton': 227920.0,
 'Cucurb': 5715.333333333333,
 'DryBean': 131776.0,
 'Grain': 1018.426,
 'On Gar': 7614.75,
 'Oth Dec': 5564.693333333333,
 'Oth Fld': 17880.059999999998,
 'Oth Trk': 11736.666666666666,
 'Pasture': 0,
 'Potato': 7267.500000000001,
 'Pro Tom': 10860.0,
 'Rice': 1772.3199999999997,
 'Safflwr': 53000.0,
 'SgrBeet': 2261.14,
 'Subtrop': 3714.6949999999997,
 'Vine': 4563.0}

find 33 perentile 66 percentile of the water usage