In [45]:
# Load libraries
import pandas
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from io import StringIO
import numpy as np
import datetime

In [46]:
names = ['subject', 'month', 'date', 'day', 'hour']
#df=pd.read_csv("September-22-2017.csv", names=names)
df=pd.read_csv("data.csv", names=names)
#df.reset_index(level=0, inplace=True)
df.head(5)


Out[46]:
subject month date day hour
0 3.0 September 21.0 Thursday 16.0
1 4.0 September 21.0 Thursday 16.0
2 3.0 September 21.0 Thursday 16.0
3 4.0 September 21.0 Thursday 16.0
4 1.0 September 21.0 Thursday 16.0

In [47]:
print(df.groupby('subject').size())


subject
1.0     7
2.0     7
3.0     7
4.0     5
5.0     9
6.0     1
7.0     1
11.0    2
dtype: int64

In [48]:
df["subject"].hist(color='k', alpha=0.75)
plt.show()



In [50]:
# box and whisker plots
df.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()



In [51]:
# histograms
df.hist()
plt.show()



In [53]:
# scatter plot matrix
scatter_matrix(df)
plt.show()



In [ ]: