In [1]:
import pandas as pd
import sys
sys.path.append("../utils")
import loaders
Note: loaders
is a custom module to handle most common data-loading operations in these analyses. It is available here.
In [2]:
_employers = loaders.load_employers()
employer_basics = _employers.set_index("CASE_ID")[[
"ER_EIN",
"ER_LEGAL_NAME",
"ER_TRADE_NAME",
"ER_CITY",
"ER_STATE_ID"
]].copy()
In [3]:
_cases = loaders.load_cases()
case_basics = _cases.set_index("CASE_ID")[[
"DATE_CONCLUDED",
"DATE_REGISTERED",
"AMT_BW_ASSESSED",
"UNDUP_EES_VIOLATED",
"INVEST_TOOL_DESC"
]]
In [4]:
table = employer_basics.join(case_basics)
In [5]:
superior_cases = table[
(table["ER_LEGAL_NAME"].fillna("").str.contains(r"SUPER.* FORE", case=False)) |
(table["ER_TRADE_NAME"].fillna("").str.contains(r"SUPER.* FORE", case=False))
].sort("DATE_CONCLUDED")
In [6]:
print("Total cases: {0}".format(len(superior_cases)))
print("Total back wages: ${0:,.2f}".format(superior_cases["AMT_BW_ASSESSED"].sum()))
In [7]:
superior_cases.columns = [ col.replace("_", " ") for col in superior_cases.columns ]
superior_cases.fillna("")
Out[7]: