In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
% matplotlib inline
In [2]:
df = pd.read_csv('data/wheat-2013-supervised.csv')
drop_cols = ['CountyName','State','Date']
df.drop(drop_cols,axis=1,inplace=True)
df.head()
Out[2]:
In [3]:
df.shape
Out[3]:
In [4]:
df.isnull().sum()
Out[4]:
In [5]:
for col in df.columns[5:]:
if df[col].var() == 0:
print '*****LOW VARIANCE WARNING***** ==> {} ==> var:{}'.format(col,df[col].var())
else:
print '{} ==> var:{}'.format(col,df[col].var())
In [6]:
df.precipTypeIsOther.value_counts().plot(kind='bar')
df.drop('precipTypeIsOther',axis=1,inplace=True)
In [7]:
df.dropna(inplace=True)
In [8]:
df.to_csv('data/wheat-2013-supervised-edited.csv')