In [ ]:
series.descibe() # most often, etc.
# devides into sections and how many values in each bin
# all counts on the same scale (0-1)
# value_count ignores null values
series.value_counts(bin=5, normaliz=True)
# find uniques I think
series.unique()
# produce histogram - data distribution 0-10, 10-20
# ML - assumes normal destribution
# bining will change bucket sizes
series.hist(bins=40)
# series can be plotted (DataFrames)
series.plot() # scatter plot
# maximim, median, minimum, 3rd quartile (top box), 
#1st quartile (bottom box)
series.plot(kind='box')

In [1]:
# Correlations between Datasets
# -  (correlated) -1 > r < 1 (correlated) 
#    - correlated models can be useless for ML
# - 0 -> no correlation
series1.corr(series2)

Exploring Data Through Visualizations


In [ ]:


In [2]:
# colorbrewer2 - colorblind safe
# Edwart Turt - visualization

# Matplotlib
# Pandas/Pyplot
# Seaborn
# Altair
# YelloBrick

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplot inline


ERROR:root:Line magic function `%matplot` not found.

In [ ]:
# go through the lectures

In [ ]:
# Default maps are ugly
plt.style.use('dark_background') # ugly
plt.style.use('ggplot')
barchart = data.plot(kind="bar")

In [ ]:
# advanced visualizations
import seaborn as sns
ax = sns.distplot(<data>)
g = sns.pairplot(<data>, hue="<target>") # compare catabories
# regression analysis
ax = sns.regplot(x="total_bill", y="tip", hue="smoker", data=tips)
# compare combinations of different variables
g = sns.FacetGrid(tips, col="time", row="smoker")
g = g.map(plt.hist, "total_bill")

In [ ]:
# Interactive Visualizations
# Either money or D3.js
# Bokeh
from bokeh.plotting import output_notebook
output_notebook()

In [ ]:
# Method chaining