In [1]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
matplotlib.style.use("ggplot")

In [4]:
file_path = "/Users/szabolcs/dev/git/DAT210x/Module3/Datasets/"
file_name = "students.data"
df = pd.read_csv(file_path + file_name)
print(df.columns)
df.head()


Index(['id', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid',
       'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel',
       'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2',
       'G3'],
      dtype='object')
Out[4]:
id sex age address famsize Pstatus Medu Fedu traveltime studytime ... famrel freetime goout Dalc Walc health absences G1 G2 G3
0 0 1 18 0 1 0 4 4 2 2 ... 4 3 4 1 1 3 4 0 11 11
1 1 1 17 0 1 1 1 1 1 2 ... 5 3 3 1 1 3 2 9 11 11
2 2 1 15 0 0 1 1 1 1 2 ... 4 3 2 2 3 3 6 12 13 12
3 3 1 15 0 1 1 4 2 1 3 ... 3 2 2 1 1 5 0 14 14 14
4 4 1 16 0 1 1 3 3 1 2 ... 4 3 2 1 2 5 0 11 13 13

5 rows × 29 columns


In [11]:
df.plot.scatter(x="G1", y="G3")
df.plot.scatter(x="G1", y="G2")
df.plot.scatter(x="failures", y="age")
df.plot.scatter(x="failures", y="absences")


<matplotlib.figure.Figure at 0x1121b63c8>
<matplotlib.figure.Figure at 0x11208fb00>
<matplotlib.figure.Figure at 0x112ca9390>

In [ ]: