Official documentation: https://seaborn.pydata.org/index.html
In [ ]:
import seaborn as sns
import pandas as pd
import math
In [ ]:
sns.__version__
In [ ]:
sns.set_context('talk')
#sns.set_context('poster')
In [ ]:
df = sns.load_dataset("fmri")
df.head()
In [ ]:
sns.relplot(x="timepoint", y="signal", kind="line", data=df);
In [ ]:
sns.relplot(x="timepoint", y="signal", kind="line", data=df,
height=6, aspect=2);
In [ ]:
l = []
for run in range(100):
for a in (1., 3.):
for x in range(10):
y = a * x + 10. * np.random.normal()
row = [x, y, a, run]
l.append(row)
df = pd.DataFrame(l, columns=["x", "y", "a", "run"])
df.head()
In [ ]:
sns.catplot(x="x", y="y", hue="a", data=df,
kind="point",
height=6, aspect=2);
In [ ]:
g = sns.catplot(x="x", y="y", hue="a", data=df,
kind="point",
height=6, aspect=2);
g._legend.set_title("Slope")
In [ ]:
tips = sns.load_dataset("tips")
tips.head()
In [ ]:
sns.relplot(x="total_bill", y="tip", data=tips);
In [ ]:
sns.scatterplot(x="total_bill", y="tip", data=tips);
In [ ]:
sns.relplot(x="total_bill", y="tip", hue="size", size="day", style="time", row="sex", col="smoker", data=tips);
Official documentation: https://seaborn.pydata.org/tutorial/relational.html#aggregation-and-representing-uncertainty
"The default behavior in seaborn is to aggregate the multiple measurements at each x value by plotting the mean and the 95% confidence interval around the mean."
In [ ]:
l = []
sigma = 1.
for run in range(1000):
for x in np.linspace(-10, 10, 100):
row = [x, np.random.normal(loc=0., scale=sigma), run]
l.append(row)
df = pd.DataFrame(l, columns=["x", "y", "run"])
df.head()
In [ ]:
sns.relplot(x="x", y="y", kind="line", data=df,
height=6, aspect=2)
plt.axhline(0, color="r", linestyle=":", label="Actual mean")
plt.legend();
In [ ]:
sns.relplot(x="x", y="y", kind="line", data=df,
height=6, aspect=2,
units="run", estimator=None, alpha=0.1)
plt.axhline(0, color="r", linestyle=":", label="Actual mean")
plt.legend();
In [ ]:
sns.relplot(x="x", y="y", data=df,
height=6, aspect=2, marker=".",
estimator=None, alpha=0.15)
plt.axhline(2. * sigma, color="k", linestyle=":", label=r"$2 \sigma$")
plt.axhline(0, color="r", linestyle=":", label="Actual mean")
plt.axhline(-2. * sigma, color="k", linestyle=":", label=r"$2 \sigma$")
plt.legend();
In [ ]:
sns.relplot(x="x", y="y", kind="line", data=df,
height=6, aspect=2,
estimator=np.median)
plt.axhline(0, color="r", linestyle=":", label="Actual median")
plt.legend();
In [ ]:
l = []
for run in range(100):
for func in ("sin", "cos"):
for x in np.linspace(-10, 10, 100):
y = math.sin(x) if func == "sin" else math.cos(x)
row = [x, y + np.random.normal(), func, run]
l.append(row)
df = pd.DataFrame(l, columns=["x", "y", "func", "run"])
df.head()
In [ ]:
sns.relplot(x="x", y="y", kind="line", hue="func", data=df,
height=6, aspect=2);
In [ ]:
fmri = sns.load_dataset("fmri")
fmri.head()
In [ ]:
sns.relplot(x="timepoint", y="signal", data=fmri,
height=6, aspect=2);
In [ ]:
sns.catplot(x="timepoint", y="signal", data=fmri, aspect=3);
In [ ]:
sns.relplot(x="timepoint", y="signal", kind="line", data=fmri,
height=6, aspect=2);
In [ ]:
l = []
for run in range(100):
for a in (1., 3.):
for x in range(10):
y = a * x + 10. * np.random.normal()
row = [x, y, a, run]
l.append(row)
df = pd.DataFrame(l, columns=["x", "y", "a", "run"])
df.head()
In [ ]:
sns.relplot(x="x", y="y", hue="a", data=df,
kind="line",
height=6, aspect=2);
The legend is bad because relplot() is made for real values, even for the "hue" variable... Here, catplot would be more adapted.
In [ ]:
l = []
for run in range(100):
for a in (1., 3.):
for x in range(10):
y = a * x + 10. * np.random.normal()
row = [x, y, a, run]
l.append(row)
df = pd.DataFrame(l, columns=["x", "y", "a", "run"])
df.head()
In [ ]:
sns.catplot(x="x", y="y", hue="a", data=df,
kind="point",
height=6, aspect=2);
In [ ]:
sns.catplot(x="x", y="y", hue="a", data=df,
kind="point",
markers=".",
scale=0.7,
linestyles=":",
capsize=0.1,
height=6, aspect=2);
In [ ]:
# https://seaborn.pydata.org/tutorial/distributions.html#visualizing-pairwise-relationships-in-a-dataset
iris = sns.load_dataset("iris")
iris.head()
In [ ]:
sns.pairplot(iris, hue="species");
In [ ]:
# https://seaborn.pydata.org/tutorial/distributions.html#visualizing-pairwise-relationships-in-a-dataset
titanic = sns.load_dataset("titanic")
titanic.head()
In [ ]:
sns.pairplot(titanic, vars=["survived", "pclass", "fare"], hue="survived");