In [2]:
import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))
In [3]:
x = np.random.normal(size=100) # 单变量分布将绘制 直方图,并绘制 KDE (核心密度函数)
sns.distplot(x) # 分布绘制
plt.show()
sns.distplot(x, kde=False, rug=True) # 去除密度曲线并在每个观测点绘制一个小的垂直刻度
# 可以使用该rugplot()功能制作地毯本身,但它也可用于distplot()
plt.show()
sns.distplot(x, bins=20, kde=False, rug=True) # 一共可以分为20个段
plt.show()
In [4]:
sns.kdeplot(x, shade=True) # KDE曲线绘制
plt.show()
In [5]:
sns.set_style("whitegrid")
x = np.random.gamma(6, size=200)
sns.distplot(x, kde=False, fit=stats.gamma)
plt.show()
In [6]:
# 使双变量分布可视化的最熟悉的方法是散点图,其中每个观察点都以x和y值的点显示。这对于两个方面的rug是分不开的。
# 您可以使用matplotlib plt.scatter函数绘制一个散点图,它也是该jointplot()函数显示的默认类型:
sns.set()
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
print(df)
sns.jointplot(x="x", y="y", data=df)
plt.show()
In [7]:
# The bivariate analogue of a histogram is known as a “hexbin” plot, because it shows the counts
# of observations that fall within hexagonal bins. This plot works best with relatively large datasets.
# It’s available through the matplotlib plt.hexbin function and as a style in jointplot(). It looks best with a white background:
# 直方图的二元绘制,在数据量大的时候更有效
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
sns.jointplot(x=x, y=y, kind="hex", color="k")
plt.show()
sns.jointplot(x="x", y="y", data=df, kind="kde") # 高维的同样可以绘制 KDE
plt.show()
In [8]:
iris = sns.load_dataset("iris")
sns.pairplot(iris)
plt.show()
In [9]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord, "categorical")))
titanic = sns.load_dataset("titanic")
tips = sns.load_dataset("tips")
iris = sns.load_dataset("iris")
sns.stripplot(x="day", y="total_bill", data=tips) # 分类散点图
plt.show()
In [10]:
sns.stripplot(x="day", y="total_bill", data=tips, jitter=True)
plt.show()
In [11]:
sns.swarmplot(x="day", y="total_bill", data=tips) # 尽量分散的方式绘制
plt.show()
# 添加一个新的分类变量
sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips) # hue 参数新增一个分类变量
plt.show()
In [12]:
# In general, the seaborn categorical plotting functions try to infer the order of categories from the data.
# If your data have a pandas Categorical datatype, then the default order of the categories can be set there.
# For other datatypes, string-typed categories will be plotted in the order they appear in the DataFrame,
# but categories that look numerical will be sorted
sns.swarmplot(x="size", y="total_bill", hue="sex", data=tips)
plt.show()
# 可以调整方向
sns.swarmplot(y="day", x="total_bill", hue="sex", data=tips)
plt.show()
In [13]:
sns.boxplot(x="day", y="total_bill", hue="time", data=tips) # 箱线图
plt.show()
In [14]:
sns.violinplot(x="total_bill", y="day", hue="time", data=tips)
plt.show()
sns.violinplot(y="total_bill", x="day", hue="time", data=tips,split=True)
plt.show()
sns.violinplot(x="day", y="total_bill", hue="sex", data=tips,
split=True, inner="stick", palette="Set3") # 画直方图而不是箱线图
plt.show()
# 可以相互结合
sns.violinplot(x="day", y="total_bill", data=tips, inner=None) # 默认的inner 是箱线图
sns.swarmplot(x="day", y="total_bill", data=tips, color="w", alpha=.5)
plt.show()
In [15]:
sns.countplot(x="deck", data=titanic, palette="Greens_d")
plt.show()
sns.pointplot(x="sex", y="survived", hue="class", data=titanic) # 竖线表示 置信区间
plt.show()
sns.pointplot(x="class", y="survived", hue="sex", data=titanic,
palette={"male": "g", "female": "m"},
markers=["^", "o"], linestyles=["-", "--"])
plt.show()
In [16]:
sns.boxplot(data=iris, orient="h")
plt.show()
In [30]:
sns.factorplot(x="day", y="total_bill", hue="smoker", data=tips, kind="bar") # 带有误差线
tips = sns.load_dataset("tips")
print(tips.describe())
plt.show()
In [18]:
sns.factorplot(x="day", y="total_bill", hue="smoker", # 绘制多列数据
col="time", data=tips, kind="swarm")
plt.show()
In [ ]: