En el siguiente ejercicio generaremos algunas variables de gran utilidad en la agricultura. La base de datos que vamos a utilizar esta disponible en la carpeta data con el nombre de data_course_aguascalientes.csv. Esta base de datos contiene la siguiente informacion:
Vamos a generar las siguientes variables
tmed = tmax + tmin / 2
dpoint = pow((humr / 100),0.125) * (112 + (0.9 * tmed )) + (0.1 * tmed) - 112
uc = tmed - tbase
In [34]:
#librerias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
In [35]:
# modificar la ubicacion del archivo segun sea el lugar en que se ejecuta jupyter notebook
data = pd.read_csv('../data/data_course_aguascalientes.csv')
In [36]:
data.head()
Out[36]:
In [37]:
data.info()
In [38]:
data.describe()
Out[38]:
In [39]:
Registros_totales = data['number'].count()
Registros_totales
Out[39]:
In [40]:
# eliminar datos nulos
data = data.dropna()
In [41]:
Registros_SinNulos = data['number'].count()
Registros_SinNulos
Out[41]:
In [42]:
(Registros_SinNulos / Registros_totales) * 100
Out[42]:
In [43]:
data['tmed'] = (data['tmax'] + data['tmin']) / 2
In [44]:
data.head()
Out[44]:
In [45]:
data['dpoint'] = pow((data['humr'] / 100),0.125) * (112 + (0.9 * data['tmed'] )) + (0.1 * data['tmed']) - 112
In [46]:
data.head()
Out[46]:
In [47]:
data['uc'] = data['tmed'] - 10
In [48]:
# elimnar todas las unidades calor negativas
data = data.loc[data['uc'] > 0]
In [49]:
data.head()
Out[49]:
In [50]:
data.groupby(['year', 'month','name']).sum()['uc']
Out[50]:
In [51]:
data[data['name']=='CEPAB'].groupby(['year', 'month','name']).sum()['uc']
Out[51]:
In [52]:
data[data['name']=='CEPAB'].groupby(['year','name']).sum()['uc'].plot.bar(figsize=(20,4))
Out[52]:
In [53]:
data[(data['name']=='CEPAB') & (data['year']==2016)].groupby(['year','month']).sum()['uc'].plot.bar(figsize=(20,4))
Out[53]:
In [54]:
data[data['year']==2016].groupby(['year','month','name']).sum()['uc']
Out[54]:
In [55]:
data['uc'].hist(bins=20)
Out[55]:
In [56]:
data['uc'].mean()
Out[56]:
In [57]:
data['rainDay'] = [1 if x > 0.0 else 0 for x in data['rain']]
In [58]:
data.head()
Out[58]:
In [59]:
data['noRainDay'] = [1 if x == 0.0 else 0 for x in data['rain']]
In [60]:
data.head()
Out[60]:
In [61]:
#%% Aggregation
aggregations = {
'rainDay' : ['sum'],
'noRainDay' : ['sum'],
'rain' : ['sum']
}
#%% Apply aggregation
group2 = data.groupby(['year', 'month', 'name']).agg(aggregations)
In [62]:
group2.columns = ["_".join(x) for x in group2.columns.ravel()]
In [63]:
group2.to_csv('../data/data_course_aguascalientes_indice_lluvia.csv')
In [64]:
data2 = pd.read_csv('../data/data_course_aguascalientes_indice_lluvia.csv')
In [65]:
data2.head()
Out[65]:
In [66]:
data2.groupby(['month']).mean()
Out[66]:
In [ ]:
In [67]:
data.head()
Out[67]:
In [91]:
data[(data['name'] == 'CEPAB') & (data['year']== 2016)].groupby(['month']).sum()['rain'].plot.bar()
Out[91]:
In [ ]: