In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import textwrap
import os
import sys
import warnings
warnings.filterwarnings('ignore')
# special things
from pivottablejs import pivot_ui
from ipywidgets import FloatSlider, interactive, IntSlider
from scipy import interpolate
# sql
%load_ext sql_magic
import sqlalchemy
import sqlite3
from sqlalchemy import create_engine
sqlite_engine = create_engine('sqlite://')
%config SQL.conn_name = "sqlite_engine"
%config SQL.output_result = False
# autoreload
%load_ext autoreload
%autoreload 1
# %aimport module_to_reload
import ipyvolume as ipv
import altair as alt
from vega_datasets import data
import seaborn as sns
sns.set_context('poster', font_scale=1.3)
import qgrid
In [2]:
coal = pd.read_csv("../data/coal_prod_cleaned.csv")
coal.head()
Out[2]:
In [3]:
coal.to_sql('coal', con=sqlite_engine, if_exists='append', index=False)
In [4]:
%%read_sql df
SELECT
Mine_State,
Labor_Hours,
sum(Production_short_tons) as Production_short_tons
FROM coal
WHERE Mine_State != "Wyoming"
GROUP BY Mine_State
In [5]:
df.head()
Out[5]:
In [6]:
df = pd.read_csv("../data/coal_prod_cleaned.csv")
df.shape
Out[6]:
In [7]:
df.columns
Out[7]:
In [8]:
qgrid_widget = qgrid.show_grid(
df[["Year", "Mine_State", "Labor_Hours", "Production_short_tons"]],
show_toolbar=True,
)
qgrid_widget
In [9]:
df2 = df[df['Mine_State'] != "Wyoming"].groupby('Mine_State').sum()
In [10]:
df3 = df.groupby('Mine_State').sum()
In [11]:
# have to run this from the home dir of this repo
# cd insight/
# python setup.py develop
In [12]:
%aimport insight.plotting
In [13]:
insight.plotting.plot_prod_vs_hours(df3, color_index=1)
# insight.plotting.plot_prod_vs_hours(df2, color_index=1)
In [ ]:
In [14]:
def plot_prod_vs_hours(
df, color_index=0, output_file="../img/production-vs-hours-worked.png"
):
fig, ax = plt.subplots(figsize=(10, 8))
sns.regplot(
df["Labor_Hours"],
df["Production_short_tons"],
ax=ax,
color=sns.color_palette()[color_index],
)
ax.set_xlabel("Labor Hours Worked")
ax.set_ylabel("Total Amount Produced")
x = ax.set_xlim(-9506023.213266129, 204993853.21326613)
y = ax.set_ylim(-51476801.43653282, 746280580.4034251)
fig.tight_layout()
fig.savefig(output_file)
In [15]:
plot_prod_vs_hours(df2, color_index=0)
In [16]:
plot_prod_vs_hours(df3, color_index=1)
In [ ]:
# make a change via qgrid
df3 = qgrid_widget.get_changed_df()
In [ ]:
In [ ]:
qgrid_widget = qgrid.show_grid(
df2[["Year", "Labor_Hours", "Production_short_tons"]],
show_toolbar=True,
)
qgrid_widget
In [ ]:
In [ ]: