In [4]:
from urllib2 import Request, urlopen
from urlparse import urlparse, urlunparse
import requests, requests_cache
import pandas as pd
import json
import os
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
from urllib2 import Request, urlopen
import requests
# In terminal: conda install requests
import requests_cache
# In terminal: pip install requests_cache
In [5]:
pwd
Out[5]:
In [6]:
os.chdir('/Users/kaijin/Downloads')
In [7]:
KAWEAH=pd.read_csv('San_Joaquin_Valley.csv')
KAWEAH['count'] = pd.Series(1, index =KAWEAH.index )
f = {'Acres':['sum'], 'WaterUsage':['mean'], 'UsageTotal':['sum'], 'count':['sum']}
KAWEAH.groupby(['Subbasin_N', 'County_N', 'Year', 'CropName']).agg(f).head()
Out[7]:
In [8]:
county_name=np.unique(KAWEAH["County_N"])
Lets extract the zipcode according to the county name
In [64]:
for i in range(9):
print county_name[i]
In [65]:
zipcode=[93210,93263,93202,93638,93620,95641,95242,95326,93201]
ZipcodeList=[{ "County_N":county_name[i], "zipcode":zipcode[i] } for i in range(len(zipcode))]
COUNTYZIP=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode"])
COUNTYZIP
Out[65]:
Lets extract the zipcode and precipetation data from California Department of Water Resources http://et.water.ca.gov/Rest/Index
In [66]:
start="2010-01-01"
def ndb_search(term,start,end,verbose = False):
"""
This takes all of the necessary parameters to form a query
Input: key (data.gov API key, string), term (type, string)
Output: JSON object
"""
url = "http://et.water.ca.gov/api/data"
response = requests.get(url, params = {
"targets": term,
"appKey":"90e36c84-3f23-48a3-becd-1865076a04fd",
"startDate":start,
"EndDate":end,
"dataItems": "day-precip"
})
response.raise_for_status() # check for errors
if verbose:
print response.url
return response.json() # parse JSON
In [67]:
Tulare2010_Recode=Tulare2010["Data"]["Providers"][0]['Records']
len(Tulare2010_Recode)
In [68]:
#note inside a county there may be multilple station that recode the data
# we take the mean then times 365 to get one year rain
# note the value is inches
In [69]:
precip=[ Tulare2010_Recode[i]['DayPrecip']['Value'] for i in range(len(Tulare2010_Recode))]
precip2=np.array(precip).astype(np.float)
#precip2
In [70]:
#WRITE INTO FUNCTIONS
def precip_cal(term,year,verbose = False):
"""
This takes zipcode and year gives precipitaion of a year
Input: term (zipcode, int), year (year, int)
Output: precipitation of a year and a certain county
"""
start="{}-01-01".format("".join(str(year)))
end="{}-12-31".format("".join(str(year)))
Tulare2010=ndb_search(term,start,end,verbose = False)
Tulare2010_Recode=Tulare2010["Data"]["Providers"][0]['Records']
precip=[ Tulare2010_Recode[i]['DayPrecip']['Value'] for i in range(len(Tulare2010_Recode))]
precip2=np.array(precip).astype(np.float)
return np.nanmean(precip2)*365 # parse JSON
In [71]:
year=[2010,2011,2012,2013,2014,2015]
ZipcodeList=[{ "County_N":county_name[i], "zipcode":zipcode[i],"year":year[j]} for i in range(len(zipcode)) for j in range(6) ]
ZipcodeList
COUNTYYear=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode","year"])
x=[precip_cal(COUNTYYear["zipcode"][i],COUNTYYear["year"][i]) for i in xrange(54) ]
In [74]:
COUNTYYear=pd.DataFrame(ZipcodeList, columns=["County_N", "zipcode","year"])
COUNTYYear["Precip"]=x
COUNTYYear
Out[74]:
In [75]:
COUNTYYear
# unit for precip is inch
newtable=pd.merge(KAWEAH, COUNTYYear,how="right")
f = {'Acres':['sum'], 'WaterUsage':['mean'], 'UsageTotal':['sum'], 'count':['sum'],"Precip":['mean']}
grouped_data=newtable.groupby(['Subbasin_N', 'County_N', 'Year', 'CropName']).agg(f)
Crop value extract from https://www.nass.usda.gov/Statistics_by_State/California/Publications/California_Ag_Statistics/CAFieldCrops.pdf cwt is unit 100pounds https://www.nass.usda.gov/Statistics_by_State/California/Publications/
In [283]:
cropname=np.unique(KAWEAH["CropName"])
In [284]:
cropname
Out[284]:
In [286]:
for i in range(len(cropname)):
print corpname[i]
len(cropname)
Out[286]:
In [20]:
def avg(l):
return sum(l, 0.0) / len(l)
avg([1*3,2*5])
1628*140.00
avg([ 8.88*466 ,5.73*682 ,2.48*3390 ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06 ])
Out[20]:
In [28]:
# data from price value in 2013
# econ value is dollar per acers
Econ_dict = { "Al Pist":2360*3.21,
"Alfalfa":7.0*206.00,
"Corn": 26.50*48.23,
"Cotton":1628*140.00,
"Cucurb":avg([260*20.20, 180*35.40, 200*25.90,580*13.00,300*16.00,330*15.60]),
#Honeydew Melons 260 2,730,000 20.20 Cwt. Cwt. $/Cwt.
#"Squash" 180 1,224,000 35.40 Cwt. Cwt. $/Cwt.
#"Cucumbers" 200 760,000 25.90 Cwt. Cwt. $/Cwt.
#"Watermelons" 580 5,800,000 13.00 Cwt. Cwt. $/Cwt.
#"Cantaloupes" 300 12,750,000 16.00 Cwt. Cwt. $/Cwt.
#"Pumpkins 330 1,947,000 15.60 Cwt. Cwt. $/Cwt.
"DryBean": 2320*56.80,
"Grain":5.35*190.36,
"On Gar":avg([ 400*13.20,165*60.30 ]),
#"Onions" spring 400 2,720,000 13.20 summer 490 3,822,000 6.40 Onions, Summer Storage 399 11,700,000 9.11
# "Garlic" 165 3,795,000 60.30
"Oth Dec":avg([ 8.88*466 ,5.73*682 ,2.48*3390 ,19.00*391,8.33*780,14.10*429 ,5.30*664 , 1.76 *3710,1750*2.06 ]),
#"Apples" 8.88 135,000 466 Tons Tons $/Ton
#"Apricots" 5.73 54,400 682 Tons Tons $/Ton
#"Cherries", 2.48 82,000 3,390 Tons Tons $/Ton
#"Pears", 19.00 220,000 391 Tons Tons $/Ton
#"Nectarines" 8.33 150,000 780 Tons Tons $/Ton
#"Peaches", 14.10 648,000 429 Tons Tons $/Ton
#"Plums", 5.30 95,400 664 Tons Tons $/Ton
#"Walnuts" 1.76 492,000 3,710 #tones Tons $/Ton
#"Pecans" 1,750 5,000 2.06 Pounds 1000pounds $/Pound
"Oth Fld":avg([1296.00* 27.1, 17.00*37.56]),
# sunflowers 1,296.00 751,500 27.1 Tons Tons $/Ton
# Sorghum2009 17.00 646,000 37.56 Tons Tons $/Ton
"Oth Trk":avg([320*29.60, 350*24.90, 32*152.00, 180*42.70, 107*248.00,425*41.70,385* 38.70 ,165*42.10,405*21.70 ]),
#"Carrots" 320 20,000,000 29.60 Cwt. Cwt. $/Cwt.
#"Lettuce" 350 33,600,000 24.90 Cwt. Cwt. $/Cwt.
#"Asparagus" 32 368,000 152.00 Cwt. Cwt. $/Cwt.
#"Cauliflower" 180 5,868,000 42.70 Cwt. Cwt. $/Cwt.
# berries 107 514,000 248.00 Cwt. Cwt. $/Cwt.
# "Peppers Bell", 425 8,465,000 41.70 Cwt. Cwt. $/Cwt.
# pepers Chile 385 2,640,000 38.70 Cwt. Cwt. $/Cwt.
# "Broccoli", 165 20,460,000 42.10 8 Cwt. Cwt. $/Cwt.
# "Cabbage", 405 5,670,000 21.70 Cwt. Cwt. $/Cwt.
"Pasture":0,
"Potato":425*17.1, # Cwt. Cwt. $/Cwt.
"Pro Tom":300*36.20, # Cwt. Cwt. $/Cwt
"Rice":84.80*20.9, # Cwt. Cwt. $/Cwt
"Safflwr": 2000.00*26.5, # Pounds Cwt. $/Cwt.
"SgrBeet": 43.40*52.1, # Tons Tons $/Ton
"Subtrop":avg([622*6.52,4.15*813 ]),
# orange 622 109000000 6.52
# Olives 4.15 166000 813 Tons Tons $/Ton
"Vine":900*5.07}# Cartons 3/ Cartons $/Carton
Econ_dict
Out[28]:
find 33 perentile 66 percentile of the water usage