In [1]:
import pandas as pd

In [2]:
my_dir = "/Volumes/dax/seals/Kaggle-NOAA-SeaLions/"
train = 'Train/train.csv'
so_far = '2017-06-24_submission_stripped.csv'

In [3]:
t_d = pd.read_csv(my_dir + train)
t_d.head()


Out[3]:
train_id adult_males subadult_males adult_females juveniles pups
0 0 62 12 486 42 344
1 1 2 20 0 12 0
2 2 2 0 38 20 0
3 3 8 5 41 7 38
4 4 6 9 2 0 0

In [4]:
t_m = t_d.mean()

In [5]:
t_m


Out[5]:
train_id          473.500000
adult_males         5.687764
subadult_males      4.583333
adult_females      39.595992
juveniles          21.221519
pups               17.178270
dtype: float64

In [6]:
sf_d = pd.read_csv(my_dir + so_far)

del sf_d['test_id']

sf_d.head()


Out[6]:
adult_males subadult_males adult_females juveniles pups
0 6 3 62 28 117
1 38 10 199 95 534
2 324 27 821 401 543
3 69 8 201 157 926
4 215 36 441 235 711

In [7]:
sums = sf_d.sum(axis = 1)

In [8]:
sf_d.iloc[1]


Out[8]:
adult_males        38
subadult_males     10
adult_females     199
juveniles          95
pups              534
Name: 1, dtype: int64

In [9]:
sums[-5:]


Out[9]:
18631    0
18632    0
18633    0
18634    0
18635    0
dtype: int64

In [10]:
counter = 0
for i in sums:
    if i == 0:
        sf_d.set_value(index = counter, col = 'adult_males', value = t_m['adult_males'])
        sf_d.set_value(index = counter, col = 'subadult_males', value = t_m['subadult_males'])
        sf_d.set_value(index = counter, col = 'adult_females', value = t_m['adult_females'])
        sf_d.set_value(index = counter, col = 'juveniles', value = t_m['juveniles'])
        sf_d.set_value(index = counter, col = 'pups', value = t_m['pups'])
    counter += 1

In [11]:
sf_d.tail()


Out[11]:
adult_males subadult_males adult_females juveniles pups
18631 5 4 39 21 17
18632 5 4 39 21 17
18633 5 4 39 21 17
18634 5 4 39 21 17
18635 5 4 39 21 17

In [12]:
sf_d.to_csv(my_dir + 'hail_mary.csv')

In [ ]: