In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
matplotlib.style.use('ggplot')
import seaborn as sns
In [2]:
os.chdir('/Users/yizhou/Desktop/')
In [3]:
ca_data = pd.read_excel('SubB_Yr_Cnty_Crp_Wtr_Acr_Zip_Prcip_Ecn.xlsx')
In [4]:
ca_data.head()
Out[4]:
In [5]:
# First we are going to make analysis in year 2010 because the data in 2010 is the most compelete data
data2010 = ca_data[ca_data['Year'] == 2010]
data2010.head()
Out[5]:
In [6]:
replacements = {
'Crop': {
r'Al Pist': 'Almond&Pistachio',
r'Cucurb': 'Melon&Cucumber',
r'On Gar': 'Onion&Garlic',
r'Oth Dec': 'DeciduousPlant',
r'Subtrop':'SubtropicalPlant',
r'Vine': 'GrapeFruit',
r'Oth Fld': 'FieldCrop',
r'SgrBeet': 'SugarBeet',
r'Safflwr': 'Safflower',
r'Pr Tom': 'ProcessedTomato',
r'Fr Tom': 'MarketTomato',
r'Oth Trk': 'TropicalPlant'}
}
data2010.replace(replacements, regex=True, inplace=True)
In [7]:
data2010.head()
Out[7]:
In [8]:
# get data for different county
name_list = np.unique(data2010['Subbasin_N'])
cont_pd = [0]*18
for n in range(len(name_list)):
cont_pd[n] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[n]])
cont_pd[n] = cont_pd[n].reset_index(drop=True)
In [9]:
cont_pd[0]
Out[9]:
In [10]:
print data2010['Crop'].unique()
x = ['Al Pist', 'Alfalfa','Corn', 'Cotton', 'Cucurb', 'DryBean', 'Grain', 'On Gar', 'Oth Dec', 'Oth Fld',
'Oth Trk', 'Pasture', 'Potato', 'Pr Tom', 'Safflwr', 'Subtrop', 'Vine']
In [11]:
names = ['CHOWCHILLA', 'MADERA', 'COSUMNES', 'EASTERN SAN JOAQUIN', 'TRACY', 'DELTA-MENDOTA', 'MODESTO',
'TURLOCK', 'KAWEAH', 'TULE', 'KERN COUNTY', 'WHITE WOLF', 'KETTLEMAN PLAIN', 'KINGS', 'TULARE LAKE',
'MERCED', 'PLEASANT VALLEY', 'WESTSIDE']
In [12]:
# assigen data into subasion dataframe, add appropriate column and sort it.
cont = [0] * 18
for i in range(0,18):
cont[i] = pd.DataFrame(data2010[data2010['Subbasin_N'] == name_list[i]])
cont[i] = cont[i].reset_index(drop=True)
num = cont[i][cont[i]['Crop'] == 'Pasture'].index
cont[i] = cont[i].drop(cont[i].index[num])
cont[i] = cont[i].reset_index(drop=True)
approp = cont[i]['WaterUsagePerAcre']*12/cont[i]['Precip']
cont[i]['Appropriate'] = pd.Series(approp,index = cont[i].index)
cont[i] = cont[i].sort_values('Appropriate', axis=0)
In [13]:
# Combine with econ value
Top3_list = []
for i in range(0,18):
a = list(cont[i][0:7].sort_values('EcnValue', axis=0, ascending = False)['Crop'][0:3])
Top3_list.append(a)
Top3_list
Out[13]:
In [14]:
Top3_df = pd.DataFrame(
{'Subbasin': names,
'Top3_Suggested_Crop': Top3_list,
})
Top3_df['Top3_Suggested_Crop']
Out[14]:
In [15]:
Lon_lat = pd.read_excel('18Sub_Lon_Lat.xlsx')
Lon_lat
combine_df = pd.merge(Lon_lat, Top3_df, left_on = "Subbasin_N", right_on = "Subbasin", how = "left")
del combine_df['Subbasin']
In [149]:
writer = pd.ExcelWriter('Lon_lat.xlsx', engine='xlsxwriter')
combine_df.to_excel(writer, sheet_name='Sheet1')
writer.save()
In [19]:
import requests
import requests_cache
requests_cache.install_cache("cache")
import plotly
plotly.tools.set_credentials_file(username='xiyy0929', api_key='qxnzYGi5IRe09usYNkit')
In [20]:
import plotly.plotly as py
import plotly.graph_objs as go
trace = [0]*18
for i in range(0,18):
trace[i] = go.Bar(
x=cont[i]['Crop'],
y=cont[i]['Appropriate'],
name = names[i]
)
data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
barmode='group'
)
showtickprefix='first'
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')
Out[20]:
In [22]:
import plotly.plotly as py
import plotly.graph_objs as go
trace = [0]*18
for i in range(0,18):
trace[i] = go.Bar(
x=cont[i]['Crop'],
y=cont[i]['Appropriate'],
name = names[i]
)
data = [trace[0], trace[1], trace[2], trace[3], trace[4], trace[5], trace[6], trace[7], trace[8], trace[9], trace[10],
trace[11], trace[12], trace[13], trace[14], trace[15], trace[16], trace[17]]
layout = go.Layout(
barmode='group'
)
showtickprefix='first'
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')
Out[22]:
In [24]:
# make my list into a dataframe
a = data2010['Crop']
b = data2010['EcnValue']
econ_df = pd.DataFrame({'Crop':a, 'EconValue': b})
econ_df = econ_df.drop_duplicates()
# drop pasture(value is zero) drop Pr Tom since cannot find the econ value
econ_df = econ_df.drop(econ_df.index[[11,13]])
econ_df = econ_df.reset_index(drop=True)
econ_df = econ_df.sort_values('EconValue',axis = 0, ascending = False)
print econ_df
sns.factorplot("Crop", 'EconValue',data = econ_df, kind = "bar", size=4, aspect=4)
sns.plt.title("Bar Plot for Economic Value for dofferent crops")
plt.show()
In [ ]:
[u'Melon&Cucumber', u'DeciduousPlant', u'Potato' u'Pr Tom', u'SubtropicalPlant' u'GrapeFruit']
In [ ]:
u'Potato' u'Melon&Cucumber' u'DeciduousPlant'
In [ ]:
In [ ]:
In [25]:
def add(df):
approp = df['WaterUsagePerAcre']*12/df['Precip']
df['Appropriate'] = pd.Series(approp,index = df.index)
df = df.sort_values('Appropriate', axis=0)
df = df.reset_index(drop=True)
return df
In [26]:
a = data2010[data2010['Crop'] == 'Potato']
b = data2010[data2010['Crop'] == 'Melon&Cucumber']
c = data2010[data2010['Crop'] == 'DeciduousPlant']
Cotton = add(a.copy())
DryBean = add(b.copy())
Safflwr = add(c.copy())
In [21]:
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
trace1 = go.Bar(
x=Cotton['Subbasin_N'],
y=Cotton['Appropriate'],
name = 'Potato'
)
trace2 = go.Bar(
x=DryBean['Subbasin_N'],
y=DryBean['Appropriate'],
name = 'Melon&Cucumber'
)
trace3 = go.Bar(
x=Safflwr['Subbasin_N'],
y=Safflwr['Appropriate'],
name = 'DeciduousPlant'
)
fig = tools.make_subplots(rows=3, cols=1)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 3, 1)
fig['layout']['xaxis1'].update(title='Subasin' )
fig['layout']['xaxis2'].update(title='Subasin' )
fig['layout']['xaxis3'].update(title='Subasin' )
fig['layout']['yaxis1'].update(title='Appropriateness',range=[0, 5])
fig['layout']['yaxis2'].update(title='Appropriateness', range=[0, 5])
fig['layout']['yaxis3'].update(title='Appropriateness', range=[0, 5])
fig['layout'].update(title='Other Interested Crops')
py.iplot(fig, filename='Other Three Instrested Crops')
In [ ]:
In [ ]: