In [9]:
from urllib2 import Request, urlopen
from urlparse import urlparse, urlunparse
import requests, requests_cache
import pandas as pd
import json
import os
import numpy as np

from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline


from urllib2 import Request, urlopen

import requests
# In terminal: conda install requests
import requests_cache
# In terminal: pip install requests_cache

In [12]:
os.chdir('/Users/kaijin/Downloads')

In [13]:
file1=pd.read_excel("SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx")
file1.head()


Out[13]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815 93638 14.15 5715.333333

In [14]:
f = {'Acre':['sum'], 'WaterUsagePerAcre':['mean'],"Precip":['mean']}
grouped_data=file1.groupby(['Subbasin_N', 'County', 'Year', 'Crop']).agg(f)
grouped_data.head()


Out[14]:
WaterUsagePerAcre Precip Acre
mean mean sum
Subbasin_N County Year Crop
CHOWCHILLA Madera 2010 Al Pist 3.344023 14.15 36958.464642
Alfalfa 4.318180 14.15 34392.028150
Corn 2.545571 14.15 6004.104783
Cotton 3.152639 14.15 3535.633219
Cucurb 1.605352 14.15 163.237815

In [15]:
cutoff1=np.percentile(grouped_data["WaterUsagePerAcre"], 40)
cutoff2=np.percentile(grouped_data["WaterUsagePerAcre"], 90)
print cutoff1,cutoff2

# get index for high water usage
list3=np.array(list(file1.loc[:,"Crop"]))
# get index for high water usage
indexH= np.where(list3 != "high")[0]
indexH=np.array(indexH).tolist()


newtable=file1


2.56030331608 4.70053080302

In [16]:
def tiers_split(i):
    if newtable["WaterUsagePerAcre"][i]> cutoff2:
        return "high"
    elif newtable["WaterUsagePerAcre"][i]> cutoff1:
        return "medium"
    else:
        return "low"
    
newtable["tiers"] = [tiers_split(i)for i in range(len(newtable["WaterUsagePerAcre"]))]
newtable["total"]= [newtable["WaterUsagePerAcre"][i]*newtable["Acre"][i] for i in range(len(newtable["WaterUsagePerAcre"]))]

In [17]:
def common_elements(list1, list2):
    """
    Return a list containing the elements which are in both list1 and list2
    
    """
    result = []
    for element in list1:
        if element in list2:
            result.append(element)
    return result

list1=np.array(list(newtable.loc[:,"tiers"]))
list2=np.array(list(newtable.loc[:,"Year"]))


# get index for high water usage
indexH= np.where(list1 == "high")[0]
indexH=np.array(indexH).tolist()
indexM= np.where(list1 == "medium")
indexM=np.array(indexM).tolist()[0]
indexL= np.where(list1 == "low")
indexL=np.array(indexL).tolist()[0]

indexNew= np.where(list3 != "Pasture")[0]
indexNew=np.array(indexNew).tolist()
indexH=common_elements(indexH,indexNew)
indexL=common_elements(indexL,indexNew)
indexM=common_elements(indexM,indexNew)


# get index for years 
index2010= np.where(list2 == 2010)
index2010=np.array(index2010).tolist()[0]
index2011= np.where(list2 == 2011)
index2011=np.array(index2011).tolist()[0]
index2012= np.where(list2 == 2012)
index2012=np.array(index2012).tolist()[0]
index2013= np.where(list2 == 2013)
index2013=np.array(index2013).tolist()[0]
index2014= np.where(list2 == 2014)
index2014=np.array(index2014).tolist()[0]
index2015= np.where(list2 == 2015)
index2015=np.array(index2015).tolist()[0]

In [18]:
# get the common list for year and tiers
# remove pastures
indexH2010=common_elements(indexH,index2010)
indexH2011=common_elements(indexH,index2011)
indexH2012=common_elements(indexH,index2012)
indexH2013=common_elements(indexH,index2013)
indexH2014=common_elements(indexH,index2014)
indexH2015=common_elements(indexH,index2015)

indexL2010=common_elements(indexL,index2010)
indexL2011=common_elements(indexL,index2011)
indexL2012=common_elements(indexL,index2012)
indexL2013=common_elements(indexL,index2013)
indexL2014=common_elements(indexL,index2014)
indexL2015=common_elements(indexL,index2015)

indexM2010=common_elements(indexM,index2010)
indexM2011=common_elements(indexM,index2011)
indexM2012=common_elements(indexM,index2012)
indexM2013=common_elements(indexM,index2013)
indexM2014=common_elements(indexM,index2014)
indexM2015=common_elements(indexM,index2015)

import plotly 
plotly.tools.set_credentials_file(username='yizhou1231', api_key='2Mua9lXLoOo1QPaNLQPB')


import plotly.plotly as py
import plotly.graph_objs as go



trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexL2010,"Acre"]), 
       sum(newtable.loc[indexL2011,"Acre"]), 
       sum(newtable.loc[indexL2012,"Acre"]),
       sum(newtable.loc[indexL2013,"Acre"]),
       sum(newtable.loc[indexL2014,"Acre"]),
       sum(newtable.loc[indexL2015,"Acre"])
      ],
    name='Low Water Usage'
)
trace2 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexM2010,"Acre"]), 
       sum(newtable.loc[indexM2011,"Acre"]), 
       sum(newtable.loc[indexM2012,"Acre"]),
       sum(newtable.loc[indexM2013,"Acre"]),
       sum(newtable.loc[indexM2014,"Acre"]),
       sum(newtable.loc[indexM2015,"Acre"])
      ],
    name='Medium Water Usage'
)

trace3 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexH2010,"Acre"]), 
       sum(newtable.loc[indexH2011,"Acre"]), 
       sum(newtable.loc[indexH2012,"Acre"]),
       sum(newtable.loc[indexH2013,"Acre"]),
       sum(newtable.loc[indexH2014,"Acre"]),
       sum(newtable.loc[indexH2015,"Acre"])
      ],
    name='High Water Usage'
)


data = [trace1, trace2,trace3]
layout = go.Layout(
    barmode='stack',
    title="crops areas trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='crops areas trends in  San Joaquin Valley')


Out[18]:

In [24]:
def avg(l):
    return sum(l, 0.0) / len(l)
rains = np.array([avg(newtable.loc[index2010,"Precip"]),
                 avg(newtable.loc[index2011,"Precip"]),
                 avg(newtable.loc[index2012,"Precip"]),
                 avg(newtable.loc[index2013,"Precip"]),
                 avg(newtable.loc[index2014,"Precip"]),
                 avg(newtable.loc[index2014,"Precip"])])

def StdRain(indexyear):
    return (avg(newtable.loc[indexyear,"Precip"])-avg(newtable.loc[:,"Precip"]))/np.std(rains)+2
StdRain(index2010)

Areas = np.array([sum(newtable.loc[index2010,"Acre"]),
                  sum(newtable.loc[index2011,"Acre"]), 
                  sum(newtable.loc[index2012,"Acre"]),
                  sum(newtable.loc[index2013,"Acre"]),
                  sum(newtable.loc[index2014,"Acre"]),
                  sum(newtable.loc[index2015,"Acre"])])

def StdArea(indexyear):
    return (sum(newtable.loc[indexyear,"Acre"])-avg(list(Areas)))/np.std(Areas)+2
StdArea(index2010)

Econs= np.array([ sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2010 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2011 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2012 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2013 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2014 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0]), 
                  sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in index2015 if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0])        
    ]
)

def StdEcon (indexyear):
    return (sum([newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"] for index in indexyear if newtable.loc[index,"Acre"]*newtable.loc[index,"EcnValue"]>0])-avg(list(Econs)))/np.std(Econs)+2
StdArea(index2010)


Out[24]:
3.7346511983920401

In [22]:
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexL2010,"total"]), 
       sum(newtable.loc[indexL2011,"total"]), 
       sum(newtable.loc[indexL2012,"total"]),
       sum(newtable.loc[indexL2013,"total"]),
       sum(newtable.loc[indexL2014,"total"]),
       sum(newtable.loc[indexL2015,"total"])
      ],
    name='Low Water Usage'
)
trace2 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexM2010,"total"]), 
       sum(newtable.loc[indexM2011,"total"]), 
       sum(newtable.loc[indexM2012,"total"]),
       sum(newtable.loc[indexM2013,"total"]),
       sum(newtable.loc[indexM2014,"total"]),
       sum(newtable.loc[indexM2015,"total"])
      ],
    name='Medium Water Usage'
)

trace3 = go.Bar(
   x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[indexH2010,"total"]), 
       sum(newtable.loc[indexH2011,"total"]), 
       sum(newtable.loc[indexH2012,"total"]),
       sum(newtable.loc[indexH2013,"total"]),
       sum(newtable.loc[indexH2014,"total"]),
       sum(newtable.loc[indexH2015,"total"])
      ],
    name='High Water Usage'
)


data = [trace1, trace2,trace3]
layout = go.Layout(
    barmode='stack',
    title="crops wateruse trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='crops wateruse trends in  San Joaquin Valley')


Out[22]:

In [25]:
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdRain(index2010), 
       StdRain(index2011), 
       StdRain(index2012),
       StdRain(index2013),
       StdRain(index2014),
       StdRain(index2015)
      ],
    name='Precip')


trace2 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdArea(index2010), 
       StdArea(index2011), 
       StdArea(index2012),
       StdArea(index2013), 
       StdArea(index2014), 
       StdArea(index2015)
      ],
    name='area')

trace3 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[StdEcon(index2010), 
       StdEcon(index2011), 
       StdEcon(index2012),
       StdEcon(index2013), 
       StdEcon(index2014), 
       StdEcon(index2015)
      ],
    name='EconValue')


data = [trace1,trace2,trace3]
layout = go.Layout(
    barmode='group',
    title="Crops, Wateruse, Value, and weather trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    ),
    yaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="standaized value"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Crops, Wateruse, Value, and weather trends in  San Joaquin Valley')


Out[25]:

In [26]:
def index_with_years (newindex):
    """
    Return a list containing the list conbined 2 index with years 2010 to 2015
    
    """
    indexNew=[common_elements(newindex,index2010)]
    
    indexNew.append(common_elements(newindex,index2011))
    indexNew.append(common_elements(newindex,index2012))
    indexNew.append(common_elements(newindex,index2013))
    indexNew.append(common_elements(newindex,index2014))    
    indexNew.append(common_elements(newindex,index2015))
    return indexNew

In [27]:
list4=np.array(list(newtable.loc[:,"Crop"]))


# get index for high water usage
indexAlfalfa= np.where(list4 == "Rice")[0]
indexAlfalfa=np.array(indexH).tolist()

In [28]:
trace1 = go.Bar(
    x=[2010, 2011, 2012,2013,2014,2015],
    y=[sum(newtable.loc[index_with_years(indexAlfalfa)[0],"Acre"]), 
       sum(newtable.loc[index_with_years(indexAlfalfa)[1],"Acre"]), 
       sum(newtable.loc[index_with_years(indexAlfalfa)[2],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[3],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[4],"Acre"]),
       sum(newtable.loc[index_with_years(indexAlfalfa)[5],"Acre"])
      ],
    name='Alfalfa'
)


data = [trace1]
layout = go.Layout(
    barmode='stack',
    title="Alfalfa areas trends in  San Joaquin Valley",     # more about "layout's" "title": /python/reference/#layout-title
    xaxis=dict(                 # all "layout's" "xaxis" attributes: /python/reference/#layout-xaxis
        title="time"            # more about "layout's" "xaxis's" "title": /python/reference/#layout-xaxis-title
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Alfalfa areas trends in  San Joaquin Valley')


Out[28]:

In [ ]:


In [ ]:


In [ ]: