In [1]:
import os
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
font = {'size': 20}
matplotlib.rc('font', **font)
curdir = !pwd
rootdir = os.path.abspath(curdir[0])
def get_columns(columns):
with open(os.path.join(rootdir, 'data.csv')) as f:
for line in f:
yield [line.split(',')[i - 1] for i in columns]
In [2]:
data = np.array(list(get_columns([1, 2])), dtype=np.float)
print("Top 10 rows\n")
print(data[:10,:])
In [3]:
def histogram(col):
data = np.array(list(get_columns([col])), dtype=np.float)
fig = plt.figure(figsize=(15,8))
ax = plt.subplot(111)
plt.grid(lw=2)
plt.hist(data, bins=20)
plt.xlabel("Column %d" % col)
plt.show()
In [4]:
histogram(1)
In [5]:
histogram(2)
In [6]:
def timeseries(col):
data = np.array(list(get_columns([col])), dtype=np.float)
fig = plt.figure(figsize=(15,8))
ax = plt.subplot(111)
plt.grid(lw=2)
plt.plot(data, 'bo')
plt.xlabel("Column %d" % col)
plt.show()
In [7]:
timeseries(1)
In [8]:
timeseries(2)
In [9]:
def scatter(x, y):
data = np.array(list(get_columns([x,y])), dtype=np.float)
fig = plt.figure(figsize=(15,8))
ax = plt.subplot(111)
plt.grid(lw=2)
plt.plot(data[:,0], data[:,1], 'bo')
plt.xlabel("Column %d" % x)
plt.ylabel("Column %d" % y)
plt.show()
In [10]:
scatter(1, 2)
In [10]: