In [191]:
L = ['CPHL_INTEG', 'CPHL_INTEG_CALC', 'CPHL_BTL', 'SALT']

In [192]:
[L.remove(l) for l in ['CPHL_INTEG', 'CPHL_INTEG_CALC']]
L


Out[192]:
['CPHL_BTL', 'SALT']

In [1]:
import pandas as pd
import numpy as np
import timeit

In [6]:
df =pd.DataFrame.from_dict({'STATN': ['A','A','A','A','A','B','B','',''], 'VISS': ['a','a','a','a','a','b','b','a','a'], 'CHL_BTL': [1,2,3,4,5, np.nan, np.nan, np.nan, np.nan], 'CHL_INTEG': [np.nan, np.nan, np.nan, np.nan, np.nan, 2, 2.5, np.nan, np.nan], 'DEPH': [0,5,10,0,5,np.nan,np.nan,np.nan,np.nan], 'MXDEP': [np.nan, np.nan, np.nan, np.nan, np.nan, 10, 10, 10, 10], 'CHL_INTEG_CALC': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 5, 7]})

In [7]:
indicator_parameter = ['CHL_BTL', 'CHL_INTEG', 'CHL_INTEG_CALC']

In [8]:
df.dropna(subset = indicator_parameter, how = 'all')


Out[8]:
CHL_BTL CHL_INTEG CHL_INTEG_CALC DEPH MXDEP STATN VISS
0 1.0 NaN NaN 0.0 NaN A a
1 2.0 NaN NaN 5.0 NaN A a
2 3.0 NaN NaN 10.0 NaN A a
3 4.0 NaN NaN 0.0 NaN A a
4 5.0 NaN NaN 5.0 NaN A a
5 NaN 2.0 NaN NaN 10.0 B b
6 NaN 2.5 NaN NaN 10.0 B b
7 NaN NaN 5.0 NaN 10.0 a
8 NaN NaN 7.0 NaN 10.0 a

In [9]:
df.loc[df.VISS == 'a'].dropna(subset = indicator_parameter, how = 'all')


Out[9]:
CHL_BTL CHL_INTEG CHL_INTEG_CALC DEPH MXDEP STATN VISS
0 1.0 NaN NaN 0.0 NaN A a
1 2.0 NaN NaN 5.0 NaN A a
2 3.0 NaN NaN 10.0 NaN A a
3 4.0 NaN NaN 0.0 NaN A a
4 5.0 NaN NaN 5.0 NaN A a
7 NaN NaN 5.0 NaN 10.0 a
8 NaN NaN 7.0 NaN 10.0 a

In [10]:
wb_df = df.loc[df.VISS == 'a'].dropna(subset = indicator_parameter, how = 'all')[indicator_parameter]
col1 = wb_df.isnull().all()

In [11]:
col1


Out[11]:
CHL_BTL           False
CHL_INTEG          True
CHL_INTEG_CALC    False
dtype: bool

In [12]:
col1[np.where(~col1)[0]].index[:].tolist()


Out[12]:
['CHL_BTL', 'CHL_INTEG_CALC']

In [13]:
df[col1[np.where(~col1)[0]].index[:].tolist()]


Out[13]:
CHL_BTL CHL_INTEG_CALC
0 1.0 NaN
1 2.0 NaN
2 3.0 NaN
3 4.0 NaN
4 5.0 NaN
5 NaN NaN
6 NaN NaN
7 NaN 5.0
8 NaN 7.0

In [14]:
col1[np.where(col1)[0]].index[:].tolist()


Out[14]:
['CHL_INTEG']

In [15]:
df.dropna(subset = col1[np.where(~col1)[0]].index[:].tolist(), how = 'all')


Out[15]:
CHL_BTL CHL_INTEG CHL_INTEG_CALC DEPH MXDEP STATN VISS
0 1.0 NaN NaN 0.0 NaN A a
1 2.0 NaN NaN 5.0 NaN A a
2 3.0 NaN NaN 10.0 NaN A a
3 4.0 NaN NaN 0.0 NaN A a
4 5.0 NaN NaN 5.0 NaN A a
7 NaN NaN 5.0 NaN 10.0 a
8 NaN NaN 7.0 NaN 10.0 a

In [58]:
statn = df.STATN.tolist()
pos = df.VISS.tolist()

In [59]:
%%timeit
for i, x in enumerate(statn): 
    if x == "":
        statn[i] = pos[i]
df['STATN'] = statn


137 µs ± 1e+03 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

In [52]:
%%timeit
for i in df.index: 
    if df.loc[i, 'STATN'] == "":
        df.loc[i, 'STATN'] = df.loc[i, 'VISS']


2.03 ms ± 38.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [ ]:
df[STATN].apply(lambda x: float(x) if x else np.nan)

In [51]:
df.loc[i, 'STATN']


Out[51]:
'B'

In [38]:
statn


Out[38]:
['A', 'A', 'A', 'A', 'A', 'a', 'a']

In [28]:
i


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-372e25f23b5a> in <module>()
----> 1 i

NameError: name 'i' is not defined

In [39]:
df.VISS.tolist()


Out[39]:
['a', 'a', 'a', 'a', 'a', 'b', 'b', 'a', 'a']

In [17]:
[p for p in ['CPHL_INTEG','BIOV_CONC_ALL'] if p in ['CPHL_BTL', 'CPHL_INTEG', 'CHL_INTEG_CALC']][0]


Out[17]:
'CPHL_INTEG'

In [183]:
df =pd.DataFrame.from_dict({'STATN': ['A','A','A','A','A','B','B','C','C'], 
                            'YEAR': [2011,2011,2012,2012,2012,2011,2012,2012,2012],
                            'WINTER_YEAR': [2012,2012,2012,2012,2012,2012,2012,2012,2012],
                            'SDATE': ['2011-11-15','2011-12-15','2012-01-15','2012-02-15','2012-03-15', '2011-12-15', '2012-02-15', '2012-02-15', '2012-03-15'], 
                            'DIP': [2, 2, 4, 4, 4, 2, 4, 4, 4],
                            'WADEP': [7,8,7.5,np.nan,7, 15, 15, 20, np.nan]})

In [181]:
df


Out[181]:
DIP SDATE STATN WADEP WINTER_YEAR YEAR
0 2 2011-11-15 A 7.0 2012 2011
1 2 2011-12-15 A 8.0 2012 2011
2 4 2012-01-15 A 7.5 2012 2012
3 4 2012-02-15 A NaN 2012 2012
4 4 2012-03-15 A 7.0 2012 2012
5 2 2011-12-15 B 15.0 2012 2011
6 4 2012-02-15 B 15.0 2012 2012
7 4 2012-02-15 C 20.0 2012 2012
8 4 2012-03-15 C NaN 2012 2012

In [74]:
#df.groupby(['STATN','SDATE','YEAR']).mean().reset_index()

In [75]:
#df.groupby(['STATN','WINTER_YEAR']).mean().reset_index()

In [184]:
index_list = []
for name, group in df.groupby(['STATN','SDATE']):
    if np.isnan(group.WADEP.values[0]):
        mean_WADEP = df.loc[df.STATN == group.STATN.values[0], 'WADEP'].mean()
        #print(mean_WADEP)
        #df.loc[(np.isnan(df.WADEP)) & (df.STATN == group.STATN.values[0]), 'WADEP'] = mean_WADEP
        group_df = group.copy()
        index_list.append(group_df.loc[np.isnan(group_df.WADEP), 'WADEP'].index[0])
        group_df.loc[group_df.loc[np.isnan(group_df.WADEP), 'WADEP'].index[0], 'WADEP'] = mean_WADEP
    #new_df = get_surf_val(group_df)
df.iloc[index_list]


Out[184]:
DIP SDATE STATN WADEP WINTER_YEAR YEAR
3 4 2012-02-15 A NaN 2012 2012
8 4 2012-03-15 C NaN 2012 2012

In [169]:
group


Out[169]:
DIP SDATE STATN WADEP WINTER_YEAR YEAR
8 4 2012-03-15 C 20.0 2012 2012

In [170]:
df


Out[170]:
DIP SDATE STATN WADEP WINTER_YEAR YEAR
0 2 2011-11-15 A 7.0 2012 2011
1 2 2011-12-15 A 8.0 2012 2011
2 4 2012-01-15 A 7.5 2012 2012
3 4 2012-02-15 A NaN 2012 2012
4 4 2012-03-15 A 7.0 2012 2012
5 2 2011-12-15 B 15.0 2012 2011
6 4 2012-02-15 B 15.0 2012 2012
7 4 2012-02-15 C 20.0 2012 2012
8 4 2012-03-15 C NaN 2012 2012

In [152]:
df


Out[152]:
DIP SDATE STATN WADEP WINTER_YEAR YEAR
0 2 2011-11-15 A 7.000 2012 2011
1 2 2011-12-15 A 8.000 2012 2011
2 4 2012-01-15 A 7.500 2012 2012
3 4 2012-02-15 A 7.375 2012 2012
4 4 2012-03-15 A 7.000 2012 2012
5 2 2011-12-15 B 15.000 2012 2011
6 4 2012-02-15 B 15.000 2012 2012
7 4 2012-02-15 C 20.000 2012 2012
8 4 2012-03-15 C 20.000 2012 2012

In [115]:
df.loc[df.STATN == group.STATN.values[0], 'WADEP']


Out[115]:
7    20.0
8    20.0
Name: WADEP, dtype: float64

In [194]:
df.columns.tolist()


Out[194]:
['DIP', 'SDATE', 'STATN', 'WADEP', 'WINTER_YEAR', 'YEAR']

In [166]:
dfc = pd.DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
dfc


Out[166]:
A B
0 aaa 1
1 bbb 2
2 ccc 3

In [167]:
dfc.loc[0,'A'] = 11

In [168]:
dfc.loc[0,'A'] = 12

In [199]:
any([True for y in [6,7,8,3] if y in [12, 1, 2, 3, 4, 5]])


Out[199]:
True

In [197]:
[True for y in [6,7,8,4] if y in [12, 1, 2, 3, 4, 5]]


Out[197]:
[True]

In [200]:
if np.nan < 9:
    print(9)

In [ ]: