In [18]:
import numpy as np
import pandas as pd
from scipy import stats

In [19]:
data = pd.read_csv('~/Dropbox/gw_data.csv')

In [20]:
mw1_arsenic = data[(data['location_id'].isin(['MW-1']) & data['param_name'].isin(['Arsenic, dissolved']))]

In [21]:
mw1_arsenic


Out[21]:
location_id sample_date analysis_result lt_measure default_unit param_name
35 MW-1 2008-10-28 1.90 ug/L Arsenic, dissolved
406 MW-1 2007-12-20 10.00 < ug/L Arsenic, dissolved
625 MW-1 2012-08-22 2.63 ug/L Arsenic, dissolved
677 MW-1 2008-12-16 1.80 ug/L Arsenic, dissolved
1065 MW-1 2010-04-13 1.70 ug/L Arsenic, dissolved
1316 MW-1 2008-08-26 1.70 ug/L Arsenic, dissolved
1479 MW-1 2008-06-18 10.00 < ug/L Arsenic, dissolved
1826 MW-1 2009-04-29 1.30 ug/L Arsenic, dissolved
2130 MW-1 2008-04-24 10.00 < ug/L Arsenic, dissolved
2255 MW-1 2012-04-03 1.93 ug/L Arsenic, dissolved
2539 MW-1 2009-02-25 1.70 ug/L Arsenic, dissolved
2899 MW-1 2008-02-20 10.00 < ug/L Arsenic, dissolved
3247 MW-1 2009-10-21 1.50 ug/L Arsenic, dissolved
3568 MW-1 2012-06-20 2.40 ug/L Arsenic, dissolved
3706 MW-1 2012-10-03 2.27 ug/L Arsenic, dissolved
4192 MW-1 2011-05-18 1.10 ug/L Arsenic, dissolved
4400 MW-1 2010-11-02 1.60 ug/L Arsenic, dissolved
5391 MW-1 2011-10-05 1.50 ug/L Arsenic, dissolved
6384 MW-1 2012-11-14 2.48 ug/L Arsenic, dissolved
6546 MW-1 2013-04-10 5.35 ug/L Arsenic, dissolved
7528 MW-1 2013-10-02 4.65 ug/L Arsenic, dissolved

In [22]:
stats.kstest(mw1_arsenic['analysis_result'], 'lognorm', stats.lognorm.fit(mw1_arsenic['analysis_result']))


Out[22]:
KstestResult(statistic=0.1424598588650321, pvalue=0.77956103342550875)

In [17]:
stats.lognorm.fit(mw1_arsenic['analysis_result'])


Out[17]:
(1.2632192051513607, 1.0334867609792653, 1.2701669010844197)

In [30]:
mw1_arsenic['analysis_result'] = mw1_arsenic['analysis_result'].transform(np.log)


/home/jentjr/anaconda3/envs/enviropy/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.

In [31]:
mw1_arsenic


Out[31]:
location_id sample_date analysis_result lt_measure default_unit param_name
35 MW-1 2008-10-28 0.641854 ug/L Arsenic, dissolved
406 MW-1 2007-12-20 2.302585 < ug/L Arsenic, dissolved
625 MW-1 2012-08-22 0.966984 ug/L Arsenic, dissolved
677 MW-1 2008-12-16 0.587787 ug/L Arsenic, dissolved
1065 MW-1 2010-04-13 0.530628 ug/L Arsenic, dissolved
1316 MW-1 2008-08-26 0.530628 ug/L Arsenic, dissolved
1479 MW-1 2008-06-18 2.302585 < ug/L Arsenic, dissolved
1826 MW-1 2009-04-29 0.262364 ug/L Arsenic, dissolved
2130 MW-1 2008-04-24 2.302585 < ug/L Arsenic, dissolved
2255 MW-1 2012-04-03 0.657520 ug/L Arsenic, dissolved
2539 MW-1 2009-02-25 0.530628 ug/L Arsenic, dissolved
2899 MW-1 2008-02-20 2.302585 < ug/L Arsenic, dissolved
3247 MW-1 2009-10-21 0.405465 ug/L Arsenic, dissolved
3568 MW-1 2012-06-20 0.875469 ug/L Arsenic, dissolved
3706 MW-1 2012-10-03 0.819780 ug/L Arsenic, dissolved
4192 MW-1 2011-05-18 0.095310 ug/L Arsenic, dissolved
4400 MW-1 2010-11-02 0.470004 ug/L Arsenic, dissolved
5391 MW-1 2011-10-05 0.405465 ug/L Arsenic, dissolved
6384 MW-1 2012-11-14 0.908259 ug/L Arsenic, dissolved
6546 MW-1 2013-04-10 1.677097 ug/L Arsenic, dissolved
7528 MW-1 2013-10-02 1.536867 ug/L Arsenic, dissolved

In [ ]:
def dunnettk(n, df=n-1, k, m, method, tail_type, conf_level):
    alpha = 1-conf_level