notebook.community

Edit and run



In [1]:

    
import os
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
matplotlib.style.use('ggplot')
import seaborn as sns



In [2]:

    
os.chdir('/Users/yizhou/Desktop/')



In [3]:

    
ca_data = pd.read_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx')



In [4]:

    
ca_data.head()









    Out[4]:






  
    
      
      Subbasin_N
      Year
      County
      Crop
      WaterUsagePerAcre
      Acre
      zipcode
      Precip
      EcnValue
    
  
  
    
      0
      CHOWCHILLA
      2010
      Madera
      Al Pist
      3.344023
      36958.464642
      93638
      14.15
      7575.600000
    
    
      1
      CHOWCHILLA
      2010
      Madera
      Alfalfa
      4.318180
      34392.028150
      93638
      14.15
      1442.000000
    
    
      2
      CHOWCHILLA
      2010
      Madera
      Corn
      2.545571
      6004.104783
      93638
      14.15
      1278.095000
    
    
      3
      CHOWCHILLA
      2010
      Madera
      Cotton
      3.152639
      3535.633219
      93638
      14.15
      227920.000000
    
    
      4
      CHOWCHILLA
      2010
      Madera
      Cucurb
      1.605352
      163.237815
      93638
      14.15
      5715.333333



In [5]:

    
# First we are going to make analysis in year 2010 because the data in 2010 is the most compelete data
data2010 = ca_data[ca_data['Year'] == 2010]
data2010.head()









    Out[5]:






  
    
      
      Subbasin_N
      Year
      County
      Crop
      WaterUsagePerAcre
      Acre
      zipcode
      Precip
      EcnValue
    
  
  
    
      0
      CHOWCHILLA
      2010
      Madera
      Al Pist
      3.344023
      36958.464642
      93638
      14.15
      7575.600000
    
    
      1
      CHOWCHILLA
      2010
      Madera
      Alfalfa
      4.318180
      34392.028150
      93638
      14.15
      1442.000000
    
    
      2
      CHOWCHILLA
      2010
      Madera
      Corn
      2.545571
      6004.104783
      93638
      14.15
      1278.095000
    
    
      3
      CHOWCHILLA
      2010
      Madera
      Cotton
      3.152639
      3535.633219
      93638
      14.15
      227920.000000
    
    
      4
      CHOWCHILLA
      2010
      Madera
      Cucurb
      1.605352
      163.237815
      93638
      14.15
      5715.333333



In [6]:

    
replacements = {
   'Crop': {
      r'Al Pist': 'Almond&Pistachio',
      r'Cucurb': 'Melon&Cucumber',
      r'On Gar': 'Onion&Garlic',
      r'Oth Dec': 'DeciduousPlant',
      r'Subtrop':'SubtropicalPlant',
      r'Vine': 'GrapeFruit',
      r'Oth Fld': 'FieldCrop',
      r'SgrBeet': 'SugarBeet',
      r'Safflwr': 'Safflower',
      r'Pr Tom': 'ProcessedTomato',
      r'Fr Tom': 'MarketTomato',
      r'Oth Trk': 'TropicalPlant'}
}

data2010.replace(replacements, regex=True, inplace=True)









    



/Users/yizhou/anaconda/lib/python2.7/site-packages/pandas/core/generic.py:3378: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  inplace=False, regex=regex)



In [7]:

    
data2010.head()









    Out[7]:






  
    
      
      Subbasin_N
      Year
      County
      Crop
      WaterUsagePerAcre
      Acre
      zipcode
      Precip
      EcnValue
    
  
  
    
      0
      CHOWCHILLA
      2010
      Madera
      Almond&Pistachio
      3.344023
      36958.464642
      93638
      14.15
      7575.600000
    
    
      1
      CHOWCHILLA
      2010
      Madera
      Alfalfa
      4.318180
      34392.028150
      93638
      14.15
      1442.000000
    
    
      2
      CHOWCHILLA
      2010
      Madera
      Corn
      2.545571
      6004.104783
      93638
      14.15
      1278.095000
    
    
      3
      CHOWCHILLA
      2010
      Madera
      Cotton
      3.152639
      3535.633219
      93638
      14.15
      227920.000000
    
    
      4
      CHOWCHILLA
      2010
      Madera
      Melon&Cucumber
      1.605352
      163.237815
      93638
      14.15
      5715.333333



In [8]:

    
# get data for different county
name_list = np.unique(data2010['Subbasin_N'])
cont_pd = [0]*18
for n in range(len(name_list)):
    cont_pd[n] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[n]])
    cont_pd[n] = cont_pd[n].reset_index(drop=True)



In [9]:

    
cont_pd[0]









    Out[9]:






  
    
      
      Subbasin_N
      Year
      County
      Crop
      WaterUsagePerAcre
      Acre
      zipcode
      Precip
      EcnValue
    
  
  
    
      0
      CHOWCHILLA
      2010
      Madera
      Almond&Pistachio
      3.344023
      36958.464642
      93638
      14.15
      7575.600000
    
    
      1
      CHOWCHILLA
      2010
      Madera
      Alfalfa
      4.318180
      34392.028150
      93638
      14.15
      1442.000000
    
    
      2
      CHOWCHILLA
      2010
      Madera
      Corn
      2.545571
      6004.104783
      93638
      14.15
      1278.095000
    
    
      3
      CHOWCHILLA
      2010
      Madera
      Cotton
      3.152639
      3535.633219
      93638
      14.15
      227920.000000
    
    
      4
      CHOWCHILLA
      2010
      Madera
      Melon&Cucumber
      1.605352
      163.237815
      93638
      14.15
      5715.333333
    
    
      5
      CHOWCHILLA
      2010
      Madera
      DryBean
      1.940346
      3.780712
      93638
      14.15
      131776.000000
    
    
      6
      CHOWCHILLA
      2010
      Madera
      Grain
      1.026442
      23115.720015
      93638
      14.15
      1018.426000
    
    
      7
      CHOWCHILLA
      2010
      Madera
      Onion&Garlic
      2.918113
      9.562978
      93638
      14.15
      7614.750000
    
    
      8
      CHOWCHILLA
      2010
      Madera
      DeciduousPlant
      3.471690
      324.251682
      93638
      14.15
      5564.693333
    
    
      9
      CHOWCHILLA
      2010
      Madera
      FieldCrop
      2.519371
      46.146930
      93638
      14.15
      17880.060000
    
    
      10
      CHOWCHILLA
      2010
      Madera
      TropicalPlant
      1.139580
      1768.928584
      93638
      14.15
      11736.666667
    
    
      11
      CHOWCHILLA
      2010
      Madera
      Pasture
      4.207748
      16459.219959
      93638
      14.15
      0.000000
    
    
      12
      CHOWCHILLA
      2010
      Madera
      Potato
      1.248577
      13.566085
      93638
      14.15
      7267.500000
    
    
      13
      CHOWCHILLA
      2010
      Madera
      ProcessedTomato
      2.520835
      919.602677
      93638
      14.15
      NaN
    
    
      14
      CHOWCHILLA
      2010
      Madera
      Rice
      4.885808
      5.782266
      93638
      14.15
      1772.320000
    
    
      15
      CHOWCHILLA
      2010
      Madera
      Safflower
      2.104100
      19.570746
      93638
      14.15
      53000.000000
    
    
      16
      CHOWCHILLA
      2010
      Madera
      SubtropicalPlant
      2.727559
      58.489844
      93638
      14.15
      3714.695000
    
    
      17
      CHOWCHILLA
      2010
      Madera
      GrapeFruit
      2.531771
      5483.367257
      93638
      14.15
      4563.000000



In [10]:

    
print data2010['Crop'].unique()
x = ['Al Pist', 'Alfalfa','Corn', 'Cotton', 'Cucurb', 'DryBean', 'Grain', 'On Gar', 'Oth Dec', 'Oth Fld',
    'Oth Trk', 'Pasture', 'Potato', 'Pr Tom', 'Safflwr', 'Subtrop', 'Vine']









    



[u'Almond&Pistachio' u'Alfalfa' u'Corn' u'Cotton' u'Melon&Cucumber'
 u'DryBean' u'Grain' u'Onion&Garlic' u'DeciduousPlant' u'FieldCrop'
 u'TropicalPlant' u'Pasture' u'Potato' u'ProcessedTomato' u'Rice'
 u'Safflower' u'SubtropicalPlant' u'GrapeFruit']



In [11]:

    
names = ['CHOWCHILLA', 'MADERA', 'COSUMNES', 'EASTERN SAN JOAQUIN', 'TRACY', 'DELTA-MENDOTA', 'MODESTO', 
        'TURLOCK', 'KAWEAH', 'TULE', 'KERN COUNTY', 'WHITE WOLF', 'KETTLEMAN PLAIN', 'KINGS', 'TULARE LAKE',
        'MERCED', 'PLEASANT VALLEY', 'WESTSIDE']



In [12]:

    
# assigen data into subasion dataframe, add appropriate column and sort it.
cont = [0] * 18
for i in range(0,18):
    cont[i] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[i]])
    cont[i] = cont[i].reset_index(drop=True)
    
    num = cont[i][cont[i]['Crop'] == 'Pasture'].index
    cont[i] = cont[i].drop(cont[i].index[num])
    cont[i] = cont[i].reset_index(drop=True)
    
    approp = cont[i]['WaterUsagePerAcre']*12/cont[i]['Precip']
    cont[i]['Appropriate'] = pd.Series(approp,index = cont[i].index)
    cont[i] = cont[i].sort_values('Appropriate', axis=0)



In [13]:

    
# Combine with econ value
Top3_list = []
for i in range(0,18):
    a = list(cont[i][0:7].sort_values('EcnValue', axis=0, ascending = False)['Crop'][0:3])
    Top3_list.append(a)
Top3_list









    Out[13]:





[[u'DryBean', u'Safflower', u'FieldCrop'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'Cotton', u'Safflower', u'FieldCrop'],
 [u'Cotton', u'DryBean', u'Safflower'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'FieldCrop'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'Cotton', u'DryBean', u'Safflower'],
 [u'DryBean', u'Safflower', u'Onion&Garlic'],
 [u'Safflower', u'FieldCrop', u'TropicalPlant'],
 [u'Cotton', u'FieldCrop', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'DryBean', u'Safflower', u'TropicalPlant'],
 [u'Cotton', u'TropicalPlant', u'Almond&Pistachio']]



In [14]:

    
Top3_df = pd.DataFrame(
    {'Subbasin': names,
     'Top3_Suggested_Crop': Top3_list,
    })
Top3_df['Top3_Suggested_Crop']









    Out[14]:





0               [DryBean, Safflower, FieldCrop]
1            [DryBean, Safflower, Onion&Garlic]
2           [DryBean, Safflower, TropicalPlant]
3            [DryBean, Safflower, Onion&Garlic]
4         [Safflower, FieldCrop, TropicalPlant]
5                [Cotton, Safflower, FieldCrop]
6                  [Cotton, DryBean, Safflower]
7         [Safflower, FieldCrop, TropicalPlant]
8               [DryBean, Safflower, FieldCrop]
9           [DryBean, Safflower, TropicalPlant]
10          [DryBean, Safflower, TropicalPlant]
11                 [Cotton, DryBean, Safflower]
12           [DryBean, Safflower, Onion&Garlic]
13        [Safflower, FieldCrop, TropicalPlant]
14           [Cotton, FieldCrop, TropicalPlant]
15          [DryBean, Safflower, TropicalPlant]
16          [DryBean, Safflower, TropicalPlant]
17    [Cotton, TropicalPlant, Almond&Pistachio]
Name: Top3_Suggested_Crop, dtype: object



In [15]:

    
Lon_lat = pd.read_excel('18Sub_Lon_Lat.xlsx')
Lon_lat
combine_df = pd.merge(Lon_lat, Top3_df, left_on = "Subbasin_N", right_on = "Subbasin", how = "left")
del combine_df['Subbasin']



In [149]:

    
writer = pd.ExcelWriter('Lon_lat.xlsx', engine='xlsxwriter')
combine_df.to_excel(writer, sheet_name='Sheet1')
writer.save()



In [19]:

    
import requests
import requests_cache
requests_cache.install_cache("cache")
import plotly 
plotly.tools.set_credentials_file(username='xiyy0929', api_key='qxnzYGi5IRe09usYNkit')



In [20]:

    
import plotly.plotly as py
import plotly.graph_objs as go


trace = [0]*18
for i in range(0,18):
    trace[i] = go.Bar(
        x=cont[i]['Crop'],
        y=cont[i]['Appropriate'],
        name = names[i]
    )

data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
       trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
    barmode='group'
)

showtickprefix='first'

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')









    Out[20]:



In [22]:

    
import plotly.plotly as py
import plotly.graph_objs as go


trace = [0]*18
for i in range(0,18):
    trace[i] = go.Bar(
        x=cont[i]['Crop'],
        y=cont[i]['Appropriate'],
        name = names[i]
    )

data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
       trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
    barmode='group'
)

showtickprefix='first'

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')









    Out[22]:



In [24]:

    
# make my list into a dataframe
a = data2010['Crop']
b = data2010['EcnValue']
econ_df = pd.DataFrame({'Crop':a, 'EconValue': b})
econ_df = econ_df.drop_duplicates()
# drop pasture(value is zero) drop Pr Tom since cannot find the econ value
econ_df = econ_df.drop(econ_df.index[[11,13]])
econ_df = econ_df.reset_index(drop=True)
econ_df = econ_df.sort_values('EconValue',axis = 0, ascending = False)
print econ_df
sns.factorplot("Crop", 'EconValue',data = econ_df, kind = "bar", size=4, aspect=4)
sns.plt.title("Bar Plot for Economic Value for dofferent crops")
plt.show()









    



                Crop      EconValue
3             Cotton  227920.000000
5            DryBean  131776.000000
13         Safflower   53000.000000
9          FieldCrop   17880.060000
10     TropicalPlant   11736.666667
7       Onion&Garlic    7614.750000
0   Almond&Pistachio    7575.600000
11            Potato    7267.500000
4     Melon&Cucumber    5715.333333
8     DeciduousPlant    5564.693333
15        GrapeFruit    4563.000000
14  SubtropicalPlant    3714.695000
12              Rice    1772.320000
1            Alfalfa    1442.000000
2               Corn    1278.095000
6              Grain    1018.426000



In [ ]:

    
[u'Melon&Cucumber', u'DeciduousPlant', u'Potato' u'Pr Tom', u'SubtropicalPlant' u'GrapeFruit']



In [ ]:

    
u'Potato' u'Melon&Cucumber' u'DeciduousPlant'



In [ ]:



In [ ]:



In [25]:

    
def add(df):
    approp = df['WaterUsagePerAcre']*12/df['Precip']
    df['Appropriate'] = pd.Series(approp,index = df.index)
    df = df.sort_values('Appropriate', axis=0)
    df = df.reset_index(drop=True)
    return df



In [26]:

    
a = data2010[data2010['Crop'] == 'Potato']
b = data2010[data2010['Crop'] == 'Melon&Cucumber']
c = data2010[data2010['Crop'] == 'DeciduousPlant']
Cotton = add(a.copy())
DryBean = add(b.copy())
Safflwr = add(c.copy())



In [21]:

    
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go

trace1 = go.Bar(
    x=Cotton['Subbasin_N'],
    y=Cotton['Appropriate'],
    name = 'Potato'
)

trace2 = go.Bar(
    x=DryBean['Subbasin_N'],
    y=DryBean['Appropriate'],
    name = 'Melon&Cucumber'
)

trace3 = go.Bar(
    x=Safflwr['Subbasin_N'],
    y=Safflwr['Appropriate'],
    name = 'DeciduousPlant'
)


fig = tools.make_subplots(rows=3, cols=1)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 3, 1)


fig['layout']['xaxis1'].update(title='Subasin' )
fig['layout']['xaxis2'].update(title='Subasin' )
fig['layout']['xaxis3'].update(title='Subasin' )


fig['layout']['yaxis1'].update(title='Appropriateness',range=[0, 5])
fig['layout']['yaxis2'].update(title='Appropriateness', range=[0, 5])
fig['layout']['yaxis3'].update(title='Appropriateness', range=[0, 5])

fig['layout'].update(title='Other Interested Crops')

py.iplot(fig, filename='Other Three Instrested Crops')









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-3b8fc64c57f9> in <module>()
      4 
      5 trace1 = go.Bar(
----> 6     x=Cotton['Subbasin_N'],
      7     y=Cotton['Appropriate'],
      8     name = 'Potato'

NameError: name 'Cotton' is not defined



In [ ]:



In [ ]:

	Subbasin_N	Year	County	Crop	WaterUsagePerAcre	Acre	zipcode	Precip	EcnValue
0	CHOWCHILLA	2010	Madera	Al Pist	3.344023	36958.464642	93638	14.15	7575.600000
1	CHOWCHILLA	2010	Madera	Alfalfa	4.318180	34392.028150	93638	14.15	1442.000000
2	CHOWCHILLA	2010	Madera	Corn	2.545571	6004.104783	93638	14.15	1278.095000
3	CHOWCHILLA	2010	Madera	Cotton	3.152639	3535.633219	93638	14.15	227920.000000
4	CHOWCHILLA	2010	Madera	Cucurb	1.605352	163.237815	93638	14.15	5715.333333