In [68]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import numpy as np
In [69]:
anscombe = sns.load_dataset("anscombe")
In [70]:
anscombe
Out[70]:
In [71]:
anscombe.groupby("dataset").describe()
Out[71]:
In [72]:
anscombe.corr()
Out[72]:
In [73]:
for data_set in anscombe.dataset.unique():
#anscombe[anscombe.dataset==data_set].plot(kind='scatter', x='x',y='y')
df = anscombe[anscombe.dataset==data_set]
slope, intercept, r_val, p_val, slope_std_error = stats.linregress(x=df.x, y=df.y)
print('y={:.2f}x+{:.2f}'.format(slope, intercept))
In [74]:
for data_set in anscombe.dataset.unique():
df = anscombe[anscombe.dataset==data_set]
sns.lmplot(x="x", y="y", data=df);
plt.title("Data set {}: y={:.2f}x+{:.2f} (p: {:.2f}, R^2: {:.2f})".format(data_set, slope, intercept, p_val, r_val))
In [ ]: