In [15]:
# First we do all the imports
%pylab inline
import numpy as np
import seaborn as sns
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
pylab.rcParams['figure.figsize'] = (20.0, 8.0)
#import ....
In [13]:
# Generate dataset from drawing points from a normal distribution with zero mean and standard deviation=1
np.random.seed(42)
data = np.random.normal(0, 1, (5, 20))
data.shape
Out[13]:
In [16]:
sns.tsplot(data, err_style='unit_traces')
Out[16]:
In [17]:
# with matplotlib
plt.plot(data.T)
Out[17]:
In [18]:
# plot the mean of each trace
data_mean = data.mean(axis=0);
plt.plot(data_mean)
plt.plot(data_mean-data.std(axis=0), 'g')
plt.plot(data_mean+data.std(axis=0), 'g')
Out[18]:
In [19]:
# Result is a tuple
description = stats.describe(data, axis=1)
In [20]:
description.minmax
Out[20]:
In [21]:
np.sqrt(description.variance)
Out[21]:
In [22]:
stats.describe(data.flatten())
Out[22]:
And something similar can be achieved using pandas
In [23]:
frame = pd.DataFrame(np.random.normal(0, 1, (1000, 5)), columns=['a', 'b', 'c', 'd', 'e'])
frame.describe()
Out[23]:
Sometimes you may need to characterize one specific timeseries (and not across a set of timeseries)
In [24]:
time = np.linspace(0, 15, 100) # [s]
f_baseline = 1.0 # [s^-1]
N = 10
# List comprehension works for creating arrays too :)
def generate_timeseries(N, t, f):
'''Generate array of N x length(t)'''
d = np.array([i * np.sin(2*np.pi*f*i*t) for i in range(N)])
return d
data = generate_timeseries(N, time, f_baseline)
In [25]:
r, c = 2, 5
plt.figure(figsize=(20, 10))
for k in range(r*c):
plt.subplot(r,c, k+1)
plt.plot(data[k, :])
plt.plot(np.tile(data[k, :].std(), len(time)), 'r', alpha=0.9)
plt.plot(np.tile(- data[k, :].std(), len(time)), 'r', alpha=0.9)
plt.ylim([-N, N])
In [26]:
sns.set(context="paper", font="monospace")
# Load the datset of correlations between cortical brain networks
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
corrmat = df.corr()
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(12, 9))
# Draw the heatmap using seaborn
sns.heatmap(corrmat, vmax=.8, square=True)
# Use matplotlib directly to emphasize known networks
networks = corrmat.columns.get_level_values("network")
for i, network in enumerate(networks):
if i and network != networks[i - 1]:
ax.axhline(len(networks) - i, c="w")
ax.axvline(i, c="w")
f.tight_layout()
In [27]:
cc = np.corrcoef(data)
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(cc)
f.tight_layout()
In [34]:
cc
Out[34]:
In [28]:
def generate_correlated_timeseries(N, t, f):
'''Generate array of N x length(t)'''
d = np.array([i * np.sin(2*np.pi*f*t) for i in range(N)])
return d
data = generate_correlated_timeseries(N, time, f_baseline)
In [29]:
r, c = 2, 5
plt.figure(figsize=(20, 10))
for k in range(r*c):
plt.subplot(r,c, k+1)
plt.plot(data[k, :])
plt.plot(np.tile(data[k, :].std(), len(time)), 'r', alpha=0.9)
plt.plot(np.tile(- data[k, :].std(), len(time)), 'r', alpha=0.9)
plt.ylim([-N, N])
In [111]:
cc = np.corrcoef(data[1:, :])
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(cc, vmax=1.0, vmin=0.9, square=True)
f.tight_layout()
In [2]:
sns.set()
# Load a predefined dataset into a pandas dataframe
df = sns.load_dataset("iris")
It seems that sepal and petal size tend to be related, that is bigger flowers are bigger! In addition there might be a systematic effect of species ...
In this dataset we have 4 variables that are numerical (features) and 3 categories (labels).
In [9]:
# Pairwise relationship plot
sns.pairplot(df, hue="species")
Out[9]:
What are we actually seeing here?
In [33]:
%load_ext version_information
%version_information numpy, matplotlib, scipy, seaborn, pandas
Out[33]: