In [1]:
import pandas as pd
import numpy as np
import pylab as plt
import statsmodels.formula.api as sm
%matplotlib inline
pd.set_option('display.precision', 4)
pd.set_option('display.max_columns', 999)
In [2]:
ZR=pd.ExcelFile('ZR_Daten_DDR_1976_1990.xlsx')
df=ZR.parse("Tabelle1")
In [3]:
df=df[(df['ERTRAG (dt/ha)']<600) & (df['ERTRAG (dt/ha)']>100)]
In [4]:
print df.describe()
In [5]:
corr=df.corr()
print corr['ERTRAG (dt/ha)']
In [6]:
data=pd.DataFrame({'Y':df['ERTRAG (dt/ha)'],'X':df['ERNTEJAHR']})
In [7]:
model= sm.ols(formula="Y ~ X", data=data).fit()
In [8]:
print model.params
In [9]:
print model.summary()
In [10]:
df['ERTRAG (dt/ha)']-=(2.867*(df['ERNTEJAHR']-1976))
In [11]:
df['ERTRAG (dt/ha)'].describe()
Out[11]:
In [12]:
data1=pd.DataFrame({'Y':df['ERTRAG (dt/ha)'],'X':df['TAG_AUSSAAT']})
In [13]:
model1= sm.ols(formula="Y ~ X", data=data1).fit()
print model1.params
In [14]:
print model1.summary()
In [15]:
df['ERTRAG (dt/ha)']-=(0.752*(df['TAG_AUSSAAT']-75)) # wrong due to errors in sowing date
In [16]:
df['ERTRAG (dt/ha)'].describe()
Out[16]:
In [17]:
plt.hist(df['ERTRAG (dt/ha)'].dropna(),bins=50)
plt.xlabel('ERTRAG (dt/ha)')
plt.grid(True)
plt.show()
In [18]:
plt.hist(df['Mittlere Ackerzahl'].dropna(),bins=50)
plt.xlabel('Mittlere Ackerzahl')
plt.grid(True)
plt.show()
In [19]:
plt.hist(df['LT_6']+df['LT_7']+df['LT_8']+df['LT_9'],bins=50)
plt.xlabel('LT6..LT9')
plt.grid(True)
plt.show()
In [20]:
plt.hist(df['NI_6']+df['NI_7']+df['NI_8']+df['NI_9'],bins=50)
plt.xlabel('NI_6..NI_9')
plt.grid(True)
plt.show()
In [20]:
In [20]: