In [26]:
from IPython.lib.display import YouTubeVideo
In [27]:
YouTubeVideo("FytuB8nFHPQ", width=400, height=300)
Out[27]:
In [1]:
from __future__ import absolute_import, division, print_function
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
import seaborn as sns
sns.set_context('poster')
sns.set_style('whitegrid')
# sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = 12, 8 # plotsize
In [4]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import scatter_matrix
from sklearn.datasets import load_boston
import warnings
warnings.filterwarnings('ignore')
Using cleaned data from Data Cleaning Notebook. See Notebook for details.
In [5]:
df = pd.read_csv("../data/coal_prod_cleaned.csv")
In [6]:
df.head()
Out[6]:
In [7]:
plt.scatter(df['Average_Employees'],
df.Labor_Hours)
plt.xlabel("Number of Employees")
plt.ylabel("Total Hours Worked");
Examples here are shamelessly stolen from the amazing: https://github.com/maartenbreddels/jupytercon-2017/blob/master/jupytercon2017-widgets.ipynb
In [54]:
from IPython.display import YouTubeVideo
In [56]:
YouTubeVideo("uHPcshgTotE", width=560, height=315)
Out[56]:
In [8]:
import bqplot as bq
In [16]:
sample_df = df.sample(100)
In [17]:
x_sc = bq.LinearScale()
y_sc = bq.LinearScale()
ax_x = bq.Axis(label='Number of Employees', scale=x_sc, grid_lines='solid')
ax_y = bq.Axis(label='Total Hours Worked', scale=y_sc, orientation='vertical', grid_lines='solid')
line = bq.Scatter(x=sample_df.Average_Employees,
y=sample_df.Labor_Hours,
scales={'x': x_sc, 'y': y_sc},
interactions={'click': 'select'},
selected_style={'opacity': 1.0, 'fill': 'DarkOrange', 'stroke': 'Red'},
unselected_style={'opacity': 0.5})
fig = bq.Figure(axes=[ax_x, ax_y], marks=[line], title='BQPlot Example')
fig
In [18]:
line.selected
Out[18]:
In [19]:
line.selected = [23, 3]
In [20]:
import bqplot.pyplot as plt
import numpy as np
x = np.linspace(0, 2, 50)
y = x**2
fig = plt.figure()
scatter = plt.scatter(x, y)
plt.show()
In [26]:
fig.animation_duration = 5000
scatter.y = x**.5
In [27]:
scatter.selected_style = {'stroke':'red', 'fill': 'orange'}
plt.brush_selector();
In [28]:
scatter.selected
Out[28]:
In [29]:
scatter.selected = [1,2,10,40]
In [ ]:
In [31]:
import ipyvolume as ipv
import numpy as np
In [32]:
ipv.example_ylm()
In [33]:
N = 1000
x, y, z = np.random.random((3, N))
In [34]:
fig = ipv.figure()
scatter = ipv.scatter(x, y, z, marker='box')
ipv.show()
In [35]:
scatter.x = scatter.x + 0.1
In [36]:
scatter.color = "green"
scatter.size = 5
In [37]:
scatter.color = np.random.random((N,3))
In [38]:
scatter.size = 2
In [50]:
ex = ipv.datasets.animated_stream.fetch().data
In [52]:
ex.shape
Out[52]:
In [53]:
ex[:, ::, ::4].shape
Out[53]:
In [ ]:
In [47]:
ipv.figure()
ipv.style.use('dark')
quiver = ipv.quiver(*ipv.datasets.animated_stream.fetch().data[:,::,::4], size=5)
ipv.animation_control(quiver, interval=200)
ipv.show()
ipv.style.use('light')
In [40]:
ipv.style.use('light')
In [41]:
quiver.geo = "cat"
In [42]:
N = 1000*1000
x, y, z = np.random.random((3, N)).astype('f4')
In [43]:
ipv.figure()
s = ipv.scatter(x, y, z, size=0.2)
ipv.show()
In [48]:
ipv.save("bqplot.html", )
In [49]:
!open bqplot.html
In [12]:
colors = sns.color_palette(n_colors=df.Year.nunique())
In [13]:
color_dict = {key: value
for key, value in zip(sorted(df.Year.unique()), colors)}
In [9]:
color_dict
Out[9]:
In [10]:
for year in sorted(df.Year.unique()[[0, 2, -1]]):
plt.scatter(df[df.Year == year].Labor_Hours,
df[df.Year == year].Production_short_tons,
c=color_dict[year],
s=50,
label=year,
)
plt.xlabel("Total Hours Worked")
plt.ylabel("Total Amount Produced")
plt.legend()
plt.savefig("ex1.png")
In [11]:
import matplotlib as mpl
In [12]:
plt.style.available
Out[12]:
In [13]:
mpl.style.use('seaborn-colorblind')
In [15]:
for year in sorted(df.Year.unique()[[0, 2, -1]]):
plt.scatter(df[df.Year == year].Labor_Hours,
df[df.Year == year].Production_short_tons,
# c=color_dict[year],
s=50,
label=year,
)
plt.xlabel("Total Hours Worked")
plt.ylabel("Total Amount Produced")
plt.legend();
# plt.savefig("ex1.png")
In [16]:
df_dict = load_boston()
features = pd.DataFrame(data=df_dict.data, columns = df_dict.feature_names)
target = pd.DataFrame(data=df_dict.target, columns = ['MEDV'])
df = pd.concat([features, target], axis=1)
df.head()
Out[16]:
In [17]:
# Target variable
fig, ax = plt.subplots(figsize=(6, 4))
sns.distplot(df.MEDV, ax=ax, rug=True, hist=False)
Out[17]:
In [18]:
fig, ax = plt.subplots(figsize=(10,7))
sns.kdeplot(df.LSTAT,
df.MEDV,
ax=ax)
Out[18]:
In [19]:
fig, ax = plt.subplots(figsize=(10, 10))
scatter_matrix(df[['MEDV', 'LSTAT', 'CRIM', 'RM', 'NOX', 'DIS']], alpha=0.2, diagonal='hist', ax=ax);
In [20]:
sns.pairplot(data=df,
vars=['MEDV', 'LSTAT', 'CRIM', 'RM', 'NOX', 'DIS'],
plot_kws={'s':20, 'alpha':0.5}
);
In [21]:
players = pd.read_csv("../data/raw_players.csv.gz", compression='gzip')
In [22]:
players.head()
Out[22]:
In [23]:
weight_categories = ["vlow_weight",
"low_weight",
"mid_weight",
"high_weight",
"vhigh_weight",
]
players['weightclass'] = pd.qcut(players['weight'],
len(weight_categories),
weight_categories)
In [24]:
players.head()
Out[24]:
In [ ]: