In [1]:
# Make plots inline
%matplotlib inline
# Make inline plots vector graphics instead of raster graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')
# import modules for plotting and data analysis
import matplotlib.pyplot # as plt
import numpy as np
import pandas
import functools
def add_bottle_id_column(data, key_name):
data['bottleID'] = np.repeat(key_name, data.shape[0])
return data
def load_flint_water_data():
flint_water_data = pandas.read_excel(
io = "https://github.com/ComputationalModeling/flint-water-data/raw/f6093bba145b1745b68bac2964b341fa30f3a08a/Flint%20Lead%20Kits%20ICP%20Data.xlsx",
sheetname = [
"Sub_B1-8.15",
"Sub_B2-8.15",
"Sub_B3-8.15",
"Sub_B1-3.16",
"Sub_B2-3.16",
"Sub_B3-3.16",
"Sub_B1-7.16",
"Sub_B2-7.16",
"Sub_B3-7.16"],
header = 0,
skiprows = 3,
names = [
"Sample",
"208Pb",
"",
"23Na",
"25Mg",
"27Al",
"28Si",
"31P",
"PO4",
"34S",
"35Cl",
"39K",
"43Ca",
"47Ti",
"51V",
"52Cr",
"54Fe",
"55Mn",
"59Co",
"60Ni",
"65Cu",
"66Zn",
"75As",
"78Se",
"88Sr",
"95Mo",
"107Ag",
"111Cd",
"112Sn",
"137Ba",
"238U"
]
)
data_with_id = [
add_bottle_id_column(value, key)
for key, value
in flint_water_data.items()]
# collapse dataframes into one long dataframe
flint_water_data = functools.reduce(lambda x,y: x.append(y), data_with_id)
return flint_water_data
def add_date_and_bottle_number(flint_data):
flint_data['bottle_number'] = flint_data['bottleID'].apply(lambda x: x.split('-')[0])
flint_data['date_collected'] = flint_data['bottleID'].apply(lambda x: x.split('-')[1])
return(flint_data)
bottle_map = {
'Sub_B1': 'bottle1',
'Sub_B2': 'bottle2',
'Sub_B3': 'bottle3'
}
date_map = {
'8.15': '2015-08-01',
'3.16': '2016-03-01',
'7.16': '2016-07-01'
}
flint_data = load_flint_water_data()
flint_data = add_date_and_bottle_number(flint_data)
flint_data = flint_data.replace(
{'bottle_number': bottle_map,
'date_collected': date_map })
flint_data['date_collected'] = pandas.DatetimeIndex(flint_data['date_collected'])
flint_data = flint_data.drop('bottleID', axis = 1)
In [ ]:
flint_data[flint_data['date_collected'] == '2016-07-01'].head()
In [4]:
flint_data.to_csv("flint_water_data_2016-09-01.csv")
In [5]: