In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
matplotlib.style.use('ggplot')
import seaborn as sns

In [2]:
os.chdir('/Users/yizhou/Desktop/')

In [3]:
ca_data = pd.read_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx')

In [4]:
ca_data.head()


Out[4]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815 93638 14.15 5715.333333

In [5]:
# First we are going to make analysis in year 2010 because the data in 2010 is the most compelete data
data2010 = ca_data[ca_data['Year'] == 2010]
data2010.head()


Out[5]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Al Pist 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Cucurb 1.605352 163.237815 93638 14.15 5715.333333

In [6]:
replacements = {
   'Crop': {
      r'Al Pist': 'Almond&Pistachio',
      r'Cucurb': 'Melon&Cucumber',
      r'On Gar': 'Onion&Garlic',
      r'Oth Dec': 'DeciduousPlant',
      r'Subtrop':'SubtropicalPlant',
      r'Vine': 'GrapeFruit',
      r'Oth Fld': 'FieldCrop',
      r'SgrBeet': 'SugarBeet',
      r'Safflwr': 'Safflower',
      r'Pr Tom': 'ProcessedTomato',
      r'Fr Tom': 'MarketTomato',
      r'Oth Trk': 'TropicalPlant'}
}

data2010.replace(replacements, regex=True, inplace=True)


/Users/yizhou/anaconda/lib/python2.7/site-packages/pandas/core/generic.py:3378: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  inplace=False, regex=regex)

In [7]:
data2010.head()


Out[7]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Almond&Pistachio 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Melon&Cucumber 1.605352 163.237815 93638 14.15 5715.333333

In [8]:
# get data for different county
name_list = np.unique(data2010['Subbasin_N'])
cont_pd = [0]*18
for n in range(len(name_list)):
    cont_pd[n] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[n]])
    cont_pd[n] = cont_pd[n].reset_index(drop=True)

In [9]:
cont_pd[0]


Out[9]:
Subbasin_N Year County Crop WaterUsagePerAcre Acre zipcode Precip EcnValue
0 CHOWCHILLA 2010 Madera Almond&Pistachio 3.344023 36958.464642 93638 14.15 7575.600000
1 CHOWCHILLA 2010 Madera Alfalfa 4.318180 34392.028150 93638 14.15 1442.000000
2 CHOWCHILLA 2010 Madera Corn 2.545571 6004.104783 93638 14.15 1278.095000
3 CHOWCHILLA 2010 Madera Cotton 3.152639 3535.633219 93638 14.15 227920.000000
4 CHOWCHILLA 2010 Madera Melon&Cucumber 1.605352 163.237815 93638 14.15 5715.333333
5 CHOWCHILLA 2010 Madera DryBean 1.940346 3.780712 93638 14.15 131776.000000
6 CHOWCHILLA 2010 Madera Grain 1.026442 23115.720015 93638 14.15 1018.426000
7 CHOWCHILLA 2010 Madera Onion&Garlic 2.918113 9.562978 93638 14.15 7614.750000
8 CHOWCHILLA 2010 Madera DeciduousPlant 3.471690 324.251682 93638 14.15 5564.693333
9 CHOWCHILLA 2010 Madera FieldCrop 2.519371 46.146930 93638 14.15 17880.060000
10 CHOWCHILLA 2010 Madera TropicalPlant 1.139580 1768.928584 93638 14.15 11736.666667
11 CHOWCHILLA 2010 Madera Pasture 4.207748 16459.219959 93638 14.15 0.000000
12 CHOWCHILLA 2010 Madera Potato 1.248577 13.566085 93638 14.15 7267.500000
13 CHOWCHILLA 2010 Madera ProcessedTomato 2.520835 919.602677 93638 14.15 NaN
14 CHOWCHILLA 2010 Madera Rice 4.885808 5.782266 93638 14.15 1772.320000
15 CHOWCHILLA 2010 Madera Safflower 2.104100 19.570746 93638 14.15 53000.000000
16 CHOWCHILLA 2010 Madera SubtropicalPlant 2.727559 58.489844 93638 14.15 3714.695000
17 CHOWCHILLA 2010 Madera GrapeFruit 2.531771 5483.367257 93638 14.15 4563.000000

In [10]:
print data2010['Crop'].unique()
x = ['Al Pist', 'Alfalfa','Corn', 'Cotton', 'Cucurb', 'DryBean', 'Grain', 'On Gar', 'Oth Dec', 'Oth Fld',
    'Oth Trk', 'Pasture', 'Potato', 'Pr Tom', 'Safflwr', 'Subtrop', 'Vine']


[u'Almond&Pistachio' u'Alfalfa' u'Corn' u'Cotton' u'Melon&Cucumber'
 u'DryBean' u'Grain' u'Onion&Garlic' u'DeciduousPlant' u'FieldCrop'
 u'TropicalPlant' u'Pasture' u'Potato' u'ProcessedTomato' u'Rice'
 u'Safflower' u'SubtropicalPlant' u'GrapeFruit']

In [11]:
names = ['CHOWCHILLA', 'MADERA', 'COSUMNES', 'EASTERN SAN JOAQUIN', 'TRACY', 'DELTA-MENDOTA', 'MODESTO', 
        'TURLOCK', 'KAWEAH', 'TULE', 'KERN COUNTY', 'WHITE WOLF', 'KETTLEMAN PLAIN', 'KINGS', 'TULARE LAKE',
        'MERCED', 'PLEASANT VALLEY', 'WESTSIDE']

In [12]:
# assigen data into subasion dataframe, add appropriate column and sort it.
cont = [0] * 18
for i in range(0,18):
    cont[i] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[i]])
    cont[i] = cont[i].reset_index(drop=True)
    
    num = cont[i][cont[i]['Crop'] == 'Pasture'].index
    cont[i] = cont[i].drop(cont[i].index[num])
    cont[i] = cont[i].reset_index(drop=True)
    
    approp = cont[i]['WaterUsagePerAcre']*12/cont[i]['Precip']
    cont[i]['Appropriate'] = pd.Series(approp,index = cont[i].index)
    cont[i] = cont[i].sort_values('Appropriate', axis=0)

In [13]:
# Combine with econ value
Top3_list = []
for i in range(0,18):
    a = list(cont[i][0:7].sort_values('EcnValue', axis=0, ascending = False)['Crop'][0:3])
    Top3_list.append(a)
Top3_list


Out[13]:
[[u'DryBean', u'Safflower', u'FieldCrop'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'Cotton', u'Safflower', u'FieldCrop'],
 [u'Cotton', u'DryBean', u'Safflower'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'FieldCrop'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'Cotton', u'DryBean', u'Safflower'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'Cotton', u'FieldCrop', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'Cotton', u'TropicalPlant', u'Almond&Pistachio']]

In [14]:
Top3_df = pd.DataFrame(
    {'Subbasin': names,
     'Top3_Suggested_Crop': Top3_list,
    })
Top3_df['Top3_Suggested_Crop']


Out[14]:
0               [DryBean, Safflower, FieldCrop]
1            [DryBean, Safflower, Onion&Garlic]
2           [DryBean, Safflower, TropicalPlant]
3            [DryBean, Safflower, Onion&Garlic]
4         [Safflower, FieldCrop, TropicalPlant]
5                [Cotton, Safflower, FieldCrop]
6                  [Cotton, DryBean, Safflower]
7         [Safflower, FieldCrop, TropicalPlant]
8               [DryBean, Safflower, FieldCrop]
9           [DryBean, Safflower, TropicalPlant]
10          [DryBean, Safflower, TropicalPlant]
11                 [Cotton, DryBean, Safflower]
12           [DryBean, Safflower, Onion&Garlic]
13        [Safflower, FieldCrop, TropicalPlant]
14           [Cotton, FieldCrop, TropicalPlant]
15          [DryBean, Safflower, TropicalPlant]
16          [DryBean, Safflower, TropicalPlant]
17    [Cotton, TropicalPlant, Almond&Pistachio]
Name: Top3_Suggested_Crop, dtype: object

In [15]:
Lon_lat = pd.read_excel('18Sub_Lon_Lat.xlsx')
Lon_lat
combine_df = pd.merge(Lon_lat, Top3_df, left_on = "Subbasin_N", right_on = "Subbasin", how = "left")
del combine_df['Subbasin']

In [149]:
writer = pd.ExcelWriter('Lon_lat.xlsx', engine='xlsxwriter')
combine_df.to_excel(writer, sheet_name='Sheet1')
writer.save()

In [19]:
import requests
import requests_cache
requests_cache.install_cache("cache")
import plotly 
plotly.tools.set_credentials_file(username='xiyy0929', api_key='qxnzYGi5IRe09usYNkit')

In [20]:
import plotly.plotly as py
import plotly.graph_objs as go


trace = [0]*18
for i in range(0,18):
    trace[i] = go.Bar(
        x=cont[i]['Crop'],
        y=cont[i]['Appropriate'],
        name = names[i]
    )

data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
       trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
    barmode='group'
)

showtickprefix='first'

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')


Out[20]:

In [22]:
import plotly.plotly as py
import plotly.graph_objs as go


trace = [0]*18
for i in range(0,18):
    trace[i] = go.Bar(
        x=cont[i]['Crop'],
        y=cont[i]['Appropriate'],
        name = names[i]
    )

data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
       trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
    barmode='group'
)

showtickprefix='first'

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')


Out[22]:

In [24]:
# make my list into a dataframe
a = data2010['Crop']
b = data2010['EcnValue']
econ_df = pd.DataFrame({'Crop':a, 'EconValue': b})
econ_df = econ_df.drop_duplicates()
# drop pasture(value is zero) drop Pr Tom since cannot find the econ value
econ_df = econ_df.drop(econ_df.index[[11,13]])
econ_df = econ_df.reset_index(drop=True)
econ_df = econ_df.sort_values('EconValue',axis = 0, ascending = False)
print econ_df
sns.factorplot("Crop", 'EconValue',data = econ_df, kind = "bar", size=4, aspect=4)
sns.plt.title("Bar Plot for Economic Value for dofferent crops")
plt.show()


                Crop      EconValue
3             Cotton  227920.000000
5            DryBean  131776.000000
13         Safflower   53000.000000
9          FieldCrop   17880.060000
10     TropicalPlant   11736.666667
7       Onion&Garlic    7614.750000
0   Almond&Pistachio    7575.600000
11            Potato    7267.500000
4     Melon&Cucumber    5715.333333
8     DeciduousPlant    5564.693333
15        GrapeFruit    4563.000000
14  SubtropicalPlant    3714.695000
12              Rice    1772.320000
1            Alfalfa    1442.000000
2               Corn    1278.095000
6              Grain    1018.426000

In [ ]:
[u'Melon&Cucumber', u'DeciduousPlant', u'Potato' u'Pr Tom', u'SubtropicalPlant' u'GrapeFruit']

In [ ]:
u'Potato' u'Melon&Cucumber' u'DeciduousPlant'

In [ ]:


In [ ]:


In [25]:
def add(df):
    approp = df['WaterUsagePerAcre']*12/df['Precip']
    df['Appropriate'] = pd.Series(approp,index = df.index)
    df = df.sort_values('Appropriate', axis=0)
    df = df.reset_index(drop=True)
    return df

In [26]:
a = data2010[data2010['Crop'] == 'Potato']
b = data2010[data2010['Crop'] == 'Melon&Cucumber']
c = data2010[data2010['Crop'] == 'DeciduousPlant']
Cotton = add(a.copy())
DryBean = add(b.copy())
Safflwr = add(c.copy())

In [21]:
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go

trace1 = go.Bar(
    x=Cotton['Subbasin_N'],
    y=Cotton['Appropriate'],
    name = 'Potato'
)

trace2 = go.Bar(
    x=DryBean['Subbasin_N'],
    y=DryBean['Appropriate'],
    name = 'Melon&Cucumber'
)

trace3 = go.Bar(
    x=Safflwr['Subbasin_N'],
    y=Safflwr['Appropriate'],
    name = 'DeciduousPlant'
)


fig = tools.make_subplots(rows=3, cols=1)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 3, 1)


fig['layout']['xaxis1'].update(title='Subasin' )
fig['layout']['xaxis2'].update(title='Subasin' )
fig['layout']['xaxis3'].update(title='Subasin' )


fig['layout']['yaxis1'].update(title='Appropriateness',range=[0, 5])
fig['layout']['yaxis2'].update(title='Appropriateness', range=[0, 5])
fig['layout']['yaxis3'].update(title='Appropriateness', range=[0, 5])

fig['layout'].update(title='Other Interested Crops')

py.iplot(fig, filename='Other Three Instrested Crops')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-3b8fc64c57f9> in <module>()
      4 
      5 trace1 = go.Bar(
----> 6     x=Cotton['Subbasin_N'],
      7     y=Cotton['Appropriate'],
      8     name = 'Potato'

NameError: name 'Cotton' is not defined

In [ ]:


In [ ]: