notebook.community

Edit and run

Crop value extract from https://www.nass.usda.gov/Statistics_by_State/California/Publications/California_Ag_Statistics/CAFieldCrops.pdf cwt is unit 100pounds https://www.nass.usda.gov/Statistics_by_State/California/Publications/ In [283]:



In [9]:

    
from urllib2 import Request, urlopen
from urlparse import urlparse, urlunparse
import requests, requests_cache
import pandas as pd
import json
import os
import numpy as np

from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline


from urllib2 import Request, urlopen

import requests
# In terminal: conda install requests
import requests_cache
# In terminal: pip install requests_cache



In [12]:

    
os.chdir('/Users/kaijin/Downloads')



In [13]:

    
file1=pd.read_excel("SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx")
file1.head()









    Out[13]:






  
    
      
      Subbasin_N
      Year
      County
      Crop
      WaterUsagePerAcre
      Acre
      zipcode
      Precip
      EcnValue
    
  
  
    
      0
      CHOWCHILLA
      2010
      Madera
      Al Pist
      3.344023
      36958.464642
      93638
      14.15
      7575.600000
    
    
      1
      CHOWCHILLA
      2010
      Madera
      Alfalfa
      4.318180
      34392.028150
      93638
      14.15
      1442.000000
    
    
      2
      CHOWCHILLA
      2010
      Madera
      Corn
      2.545571
      6004.104783
      93638
      14.15
      1278.095000
    
    
      3
      CHOWCHILLA
      2010
      Madera
      Cotton
      3.152639
      3535.633219
      93638
      14.15
      227920.000000
    
    
      4
      CHOWCHILLA
      2010
      Madera
      Cucurb
      1.605352
      163.237815
      93638
      14.15
      5715.333333



In [14]:

    
f = {'Acre':['sum'], 'WaterUsagePerAcre':['mean'],"Precip":['mean']}
grouped_data=file1.groupby(['Subbasin_N', 'County', 'Year', 'Crop']).agg(f)
grouped_data.head()









    Out[14]:






  
    
      
      
      
      
      WaterUsagePerAcre
      Precip
      Acre
    
    
      
      
      
      
      mean
      mean
      sum
    
    
      Subbasin_N
      County
      Year
      Crop
      
      
      
    
  
  
    
      CHOWCHILLA
      Madera
      2010
      Al Pist
      3.344023
      14.15
      36958.464642
    
    
      Alfalfa
      4.318180
      14.15
      34392.028150
    
    
      Corn
      2.545571
      14.15
      6004.104783
    
    
      Cotton
      3.152639
      14.15
      3535.633219
    
    
      Cucurb
      1.605352
      14.15
      163.237815



In [15]:

    
cutoff1=np.percentile(grouped_data["WaterUsagePerAcre"], 40)
cutoff2=np.percentile(grouped_data["WaterUsagePerAcre"], 90)
print cutoff1,cutoff2

# get index for high water usage
list3=np.array(list(file1.loc[:,"Crop"]))
# get index for high water usage
indexH= np.where(list3 != "high")[0]
indexH=np.array(indexH).tolist()


newtable=file1









    



2.56030331608 4.70053080302



In [16]:

    
def tiers_split(i):
    if newtable["WaterUsagePerAcre"][i]> cutoff2:
        return "high"
    elif newtable["WaterUsagePerAcre"][i]> cutoff1:
        return "medium"
    else:
        return "low"
    
newtable["tiers"] = [tiers_split(i)for i in range(len(newtable["WaterUsagePerAcre"]))]
newtable["total"]= [newtable["WaterUsagePerAcre"][i]*newtable["Acre"][i] for i in range(len(newtable["WaterUsagePerAcre"]))]



In [17]:

    
def common_elements(list1, list2):
    """
    Return a list containing the elements which are in both list1 and list2
    
    """
    result = []
    for element in list1:
        if element in list2:
            result.append(element)
    return result

list1=np.array(list(newtable.loc[:,"tiers"]))
list2=np.array(list(newtable.loc[:,"Year"]))


# get index for high water usage
indexH= np.where(list1 == "high")[0]
indexH=np.array(indexH).tolist()
indexM= np.where(list1 == "medium")
indexM=np.array(indexM).tolist()[0]
indexL= np.where(list1 == "low")
indexL=np.array(indexL).tolist()[0]

indexNew= np.where(list3 != "Pasture")[0]
indexNew=np.array(indexNew).tolist()
indexH=common_elements(indexH,indexNew)
indexL=common_elements(indexL,indexNew)
indexM=common_elements(indexM,indexNew)


# get index for years 
index2010= np.where(list2 == 2010)
index2010=np.array(index2010).tolist()[0]
index2011= np.where(list2 == 2011)
index2011=np.array(index2011).tolist()[0]
index2012= np.where(list2 == 2012)
index2012=np.array(index2012).tolist()[0]
index2013= np.where(list2 == 2013)
index2013=np.array(index2013).tolist()[0]
index2014= np.where(list2 == 2014)
index2014=np.array(index2014).tolist()[0]
index2015= np.where(list2 == 2015)
index2015=np.array(index2015).tolist()[0]



In [18]:

    
# get the common list for year and tiers
# remove pastures
indexH2010=common_elements(indexH,index2010)
indexH2011=common_elements(indexH,index2011)
indexH2012=common_elements(indexH,index2012)
indexH2013=common_elements(indexH,index2013)
indexH2014=common_elements(indexH,index2014)
indexH2015=common_elements(indexH,index2015)

indexL2010=common_elements(indexL,index2010)
indexL2011=common_elements(indexL,index2011)
indexL2012=common_elements(indexL,index2012)
indexL2013=common_elements(indexL,index2013)
indexL2014=common_elements(indexL,index2014)
indexL2015=common_elements(indexL,index2015)

indexM2010=common_elements(indexM,index2010)
indexM2011=common_elements(indexM,index2011)
indexM2012=common_elements(indexM,index2012)
indexM2013=common_elements(indexM,index2013)
indexM2014=common_elements(indexM,index2014)
indexM2015=common_elements(indexM,index2015)

import plotly 
plotly.tools.set_credentials_file(username='yizhou1231', api_key='2Mua9lXLoOo1QPaNLQPB')


import plotly.plotly as py
import plotly.graph_objs as go



trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexL2010,"Acre"]), 
       sum(newtable.loc[indexL2011,"Acre"]), 
       sum(newtable.loc[indexL2012,"Acre"]),
       sum(newtable.loc[indexL2013,"Acre"]),
       sum(newtable.loc[indexL2014,"Acre"]),
       sum(newtable.loc[indexL2015,"Acre"])
      ],
    name='Low Water Usage'
)
trace2 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexM2010,"Acre"]), 
       sum(newtable.loc[indexM2011,"Acre"]), 
       sum(newtable.loc[indexM2012,"Acre"]),
       sum(newtable.loc[indexM2013,"Acre"]),
       sum(newtable.loc[indexM2014,"Acre"]),
       sum(newtable.loc[indexM2015,"Acre"])
      ],
    name='Medium Water Usage'
)

trace3 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexH2010,"Acre"]), 
       sum(newtable.loc[indexH2011,"Acre"]), 
       sum(newtable.loc[indexH2012,"Acre"]),
       sum(newtable.loc[indexH2013,"Acre"]),
       sum(newtable.loc[indexH2014,"Acre"]),
       sum(newtable.loc[indexH2015,"Acre"])
      ],
    name='High Water Usage'
)


data = [trace1, trace2,trace3]
layout = go.Layout(
    barmode='stack',
    title="crops areas trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='crops areas trends in  San Joaquin Valley')









    Out[18]:



In [24]:

    
def avg(l):
    return sum(l, 0.0) / len(l)
rains = np.array([avg(newtable.loc[index2010,"Precip"]),
                 avg(newtable.loc[index2011,"Precip"]),
                 avg(newtable.loc[index2012,"Precip"]),
                 avg(newtable.loc[index2013,"Precip"]),
                 avg(newtable.loc[index2014,"Precip"]),
                 avg(newtable.loc[index2014,"Precip"])])

def StdRain(indexyear):
    return (avg(newtable.loc[indexyear,"Precip"])-avg(newtable.loc[:,"Precip"]))/np.std(rains)+2
StdRain(index2010)

Areas = np.array([sum(newtable.loc[index2010,"Acre"]),
                  sum(newtable.loc[index2011,"Acre"]), 
                  sum(newtable.loc[index2012,"Acre"]),
                  sum(newtable.loc[index2013,"Acre"]),
                  sum(newtable.loc[index2014,"Acre"]),
                  sum(newtable.loc[index2015,"Acre"])])

def StdArea(indexyear):
    return (sum(newtable.loc[indexyear,"Acre"])-avg(list(Areas)))/np.std(Areas)+2
StdArea(index2010)

Econs= np.array([ sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2010 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2011 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2012 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2013 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2014 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2015 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0])        
    ]
)

def StdEcon (indexyear):
    return (sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in indexyear if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0])-avg(list(Econs)))/np.std(Econs)+2
StdArea(index2010)









    Out[24]:





3.7346511983920401



In [22]:

    
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexL2010,"total"]), 
       sum(newtable.loc[indexL2011,"total"]), 
       sum(newtable.loc[indexL2012,"total"]),
       sum(newtable.loc[indexL2013,"total"]),
       sum(newtable.loc[indexL2014,"total"]),
       sum(newtable.loc[indexL2015,"total"])
      ],
    name='Low Water Usage'
)
trace2 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexM2010,"total"]), 
       sum(newtable.loc[indexM2011,"total"]), 
       sum(newtable.loc[indexM2012,"total"]),
       sum(newtable.loc[indexM2013,"total"]),
       sum(newtable.loc[indexM2014,"total"]),
       sum(newtable.loc[indexM2015,"total"])
      ],
    name='Medium Water Usage'
)

trace3 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexH2010,"total"]), 
       sum(newtable.loc[indexH2011,"total"]), 
       sum(newtable.loc[indexH2012,"total"]),
       sum(newtable.loc[indexH2013,"total"]),
       sum(newtable.loc[indexH2014,"total"]),
       sum(newtable.loc[indexH2015,"total"])
      ],
    name='High Water Usage'
)


data = [trace1, trace2,trace3]
layout = go.Layout(
    barmode='stack',
    title="crops wateruse trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='crops wateruse trends in  San Joaquin Valley')









    Out[22]:



In [25]:

    
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdRain(index2010), 
       StdRain(index2011), 
       StdRain(index2012),
       StdRain(index2013),
       StdRain(index2014),
       StdRain(index2015)
      ],
    name='Precip')


trace2 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdArea(index2010), 
       StdArea(index2011), 
       StdArea(index2012),
       StdArea(index2013), 
       StdArea(index2014), 
       StdArea(index2015)
      ],
    name='area')

trace3 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdEcon(index2010), 
       StdEcon(index2011), 
       StdEcon(index2012),
       StdEcon(index2013), 
       StdEcon(index2014), 
       StdEcon(index2015)
      ],
    name='EconValue')


data = [trace1,trace2,trace3]
layout = go.Layout(
    barmode='group',
    title="Crops, Wateruse, Value, and weather trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    ),
    yaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="standaized value"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Crops, Wateruse, Value, and weather trends in  San Joaquin Valley')









    Out[25]:



In [26]:

    
def index_with_years (newindex):
    """
    Return a list containing the list conbined 2 index with years 2010 to 2015
    
    """
    indexNew=[common_elements(newindex,index2010)]
    
    indexNew.append(common_elements(newindex,index2011))
    indexNew.append(common_elements(newindex,index2012))
    indexNew.append(common_elements(newindex,index2013))
    indexNew.append(common_elements(newindex,index2014))    
    indexNew.append(common_elements(newindex,index2015))
    return indexNew



In [27]:

    
list4=np.array(list(newtable.loc[:,"Crop"]))


# get index for high water usage
indexAlfalfa= np.where(list4 == "Rice")[0]
indexAlfalfa=np.array(indexH).tolist()



In [28]:

    
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[index_with_years(indexAlfalfa)[0],"Acre"]), 
       sum(newtable.loc[index_with_years(indexAlfalfa)[1],"Acre"]), 
       sum(newtable.loc[index_with_years(indexAlfalfa)[2],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[3],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[4],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[5],"Acre"])
      ],
    name='Alfalfa'
)


data = [trace1]
layout = go.Layout(
    barmode='stack',
    title="Alfalfa areas trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Alfalfa areas trends in  San Joaquin Valley')









    Out[28]:



In [ ]:



In [ ]:



In [ ]:

	Subbasin_N	Year	County	Crop	WaterUsagePerAcre	Acre	zipcode	Precip	EcnValue
0	CHOWCHILLA	2010	Madera	Al Pist	3.344023	36958.464642	93638	14.15	7575.600000
1	CHOWCHILLA	2010	Madera	Alfalfa	4.318180	34392.028150	93638	14.15	1442.000000
2	CHOWCHILLA	2010	Madera	Corn	2.545571	6004.104783	93638	14.15	1278.095000
3	CHOWCHILLA	2010	Madera	Cotton	3.152639	3535.633219	93638	14.15	227920.000000
4	CHOWCHILLA	2010	Madera	Cucurb	1.605352	163.237815	93638	14.15	5715.333333

				WaterUsagePerAcre	Precip	Acre
				mean	mean	sum
Subbasin_N	County	Year	Crop
CHOWCHILLA	Madera	2010	Al Pist	3.344023	14.15	36958.464642
			Alfalfa	4.318180	14.15	34392.028150
			Corn	2.545571	14.15	6004.104783
			Cotton	3.152639	14.15	3535.633219
			Cucurb	1.605352	14.15	163.237815