In [1]:
import insightex.plotting as ex
In [ ]:
In [1]:
from __future__ import print_function, division
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import textwrap
import os
import sys
import warnings
warnings.filterwarnings('ignore')
# special things
from pivottablejs import pivot_ui
from ipywidgets import FloatSlider, interactive, IntSlider
from scipy import interpolate
# sql
%load_ext sql_magic
import sqlalchemy
import sqlite3
from sqlalchemy import create_engine
sqlite_engine = create_engine('sqlite://')
# autoreload
%load_ext autoreload
%autoreload 1
# %aimport module_to_reload
# ehh...
# import bqplot.pyplot as plt
import ipyvolume as ipv
import altair as alt
from vega_datasets import data
import seaborn as sns
sns.set_context('poster', font_scale=1.3)
import qgrid
Interactive pandas dataframes: https://github.com/quantopian/qgrid
In [2]:
df = pd.read_csv("../data/coal_prod_cleaned.csv")
df.head()
Out[2]:
In [3]:
df.shape
Out[3]:
In [4]:
df.columns
Out[4]:
In [5]:
qgrid_widget = qgrid.show_grid(
df[["Year", "Mine_State", "Labor_Hours", "Production_short_tons"]],
show_toolbar=True,
)
qgrid_widget
In [8]:
df2 = df[df['Mine_State'] != "Wyoming"].groupby('Mine_State').sum()
In [9]:
df3 = df.groupby('Mine_State').sum()
In [ ]:
# have to run this from the home dir of this repo
# cd insight/
# python setup.py develop
In [10]:
%aimport insight.plotting
In [12]:
insight.plotting.plot_prod_vs_hours(df3, color_index=1)
# insight.plotting.plot_prod_vs_hours(df2, color_index=1)
In [ ]:
In [13]:
def plot_prod_vs_hours(
df, color_index=0, output_file="../img/production-vs-hours-worked.png"
):
fig, ax = plt.subplots(figsize=(10, 8))
sns.regplot(
df["Labor_Hours"],
df["Production_short_tons"],
ax=ax,
color=sns.color_palette()[color_index],
)
ax.set_xlabel("Labor Hours Worked")
ax.set_ylabel("Total Amount Produced")
x = ax.set_xlim(-9506023.213266129, 204993853.21326613)
y = ax.set_ylim(-51476801.43653282, 746280580.4034251)
fig.tight_layout()
fig.savefig(output_file)
In [14]:
plot_prod_vs_hours(df2, color_index=0)
In [ ]:
plot_prod_vs_hours(df3, color_index=1)
In [ ]:
# make a change via qgrid
df3 = qgrid_widget.get_changed_df()
In [ ]:
In [ ]:
qgrid_widget = qgrid.show_grid(
df2[["Year", "Labor_Hours", "Production_short_tons"]],
show_toolbar=True,
)
qgrid_widget