Data Visualisation


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

Chart Attributes

Pyplot - Size, Title, Axes Labels, Axes Range


In [34]:
plt.figure(figsize = [15, 5])
plt.title('My plot')
plt.xlabel('X values')
plt.ylabel('Count of values')
plt.xlim([0, 10])
plt.ylim([0, 5])


Out[34]:
(0, 5)

Axes - Set Range

ax.set_ylim([0, 1])
ax.set_xlim([0, 1])

Count of Each Label in Column of DataFrame (Histogram)


In [7]:
data_dict = {
    'x': [0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 1],
    'y': [1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 1]
}

df = pd.DataFrame(data=data_dict)

plt.title('My plot')
plt.xlabel('X values')
plt.ylabel('Count of values')
sns.countplot(df['x']);


Percentage of Each Label in Column of DataFrame


In [22]:
data_dict = {
    'x': [0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 1],
    'y': [1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 1]
}

df = pd.DataFrame(data=data_dict)
percentage_dist = df['x'].value_counts(normalize=True).sort_index()
ax = sns.barplot(x=percentage_dist.index, y=percentage_dist.values)
ax.set_ylim([0,1]);


Count of (Continuous) Values in Column of DataFrame


In [45]:
mu, sigma = 0, 0.1 # mean and standard deviation
data_dict = {
    'x': np.random.normal(mu, sigma, 1000)
}

df = pd.DataFrame(data=data_dict)
plt.hist(df['x'], bins=np.arange(start=-0.4, stop=0.4, step=0.05));