In [1]:
%matplotlib inline
path = "data/galaxy/sample/"
#path = "data/galaxy/"
train_path = path + 'train/'
valid_path = path + 'valid/'
test_path = path + 'test/'
results_path = path + 'results/'
model_path = path + 'model/'
In [2]:
from utils import *
In [3]:
batch_size = 32
num_epoch = 1
In [4]:
import pandas as pd
df = pd.read_csv(path+ "train.csv")
df_val = pd.read_csv(path+ "valid.csv")
In [5]:
# custom iterator for regression
import Iterator; reload(Iterator)
from Iterator import DirectoryIterator
imgen = image.ImageDataGenerator()
# imgen = image.ImageDataGenerator(samplewise_center=0,
# rotation_range=360,
# width_shift_range=0.05,
# height_shift_range=0.05,
# zoom_range=[0.9,1.2],
# horizontal_flip=True,
# channel_shift_range=0.1,
# dim_ordering='tf')
batches = DirectoryIterator(train_path, imgen,
class_mode=None,
dataframe=df,
batch_size=4,
target_size=(128,128))
val_imgen = image.ImageDataGenerator()
val_batches = DirectoryIterator(valid_path, val_imgen,
class_mode=None,
dataframe=df_val,
batch_size=4,
target_size=(128,128))
In [6]:
imgs, target = next(batches)
imgs[0].shape
Out[6]:
In [7]:
plots(imgs)
In [35]:
def conv1():
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,128,128)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dense(37)
])
model.compile(Adam(lr=0.0001), loss='mse')
return model
In [36]:
model = conv1()
In [37]:
model.summary()
In [39]:
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[39]:
In [40]:
model.save_weights(model_path+'conv1.h5')
In [41]:
train_files = batches.filenames
train_out = model.predict_generator(batches, batches.nb_sample)
In [42]:
features = list(df.columns.values)
train_ids = [os.path.splitext(f) for f in train_files]
submission = pd.DataFrame(train_out, columns=features[2:])
submission.insert(0, 'GalaxyID', [int(a[0][7:]) for a in train_ids])
submission.head()
Out[42]:
In [43]:
df.loc[df['GalaxyID'] == 924379]
Out[43]:
In [23]:
val_files = val_batches.filenames
val_out = model.predict_generator(val_batches, val_batches.nb_sample)
In [27]:
features = list(df_val.columns.values)
val_ids = [os.path.splitext(f) for f in val_files]
submission = pd.DataFrame(val_out, columns=features[2:])
submission.insert(0, 'GalaxyID', [int(a[0][7:]) for a in val_ids])
submission.head()
Out[27]:
In [28]:
df_val.loc[df_val['GalaxyID'] == 546684]
Out[28]:
In [12]:
test_batches = get_batches(test_path, batch_size=64, target_size=(128,128))
In [13]:
test_files = test_batches.filenames
test_out = model.predict_generator(test_batches, test_batches.nb_sample)
save_array(results_path+'test_out.dat', test_out)
In [14]:
features = list(df.columns.values)
test_ids = [os.path.splitext(f) for f in test_files]
submission = pd.DataFrame(test_out, columns=features[2:])
submission.insert(0, 'GalaxyID', [int(a[0][7:]) for a in test_ids])
submission.head()
Out[14]:
In [15]:
subm_name = results_path+'subm.csv'
submission.to_csv(subm_name, index=False)
FileLink(subm_name)
Out[15]:
In [ ]:
imgen_aug = image.ImageDataGenerator(horizontal_flip=True)
batches = DirectoryIterator(train_path, imgen_aug,
class_mode=None,
dataframe=df,
batch_size=4)
model = conv1()
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[ ]:
In [ ]:
imgen_aug = image.ImageDataGenerator(rotation_range=360)
batches = DirectoryIterator(train_path, imgen_aug,
class_mode=None,
dataframe=df,
batch_size=4)
model = conv1()
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
In [ ]:
imgen_aug = image.ImageDataGenerator(width_shift_range=0.05)
batches = DirectoryIterator(train_path, imgen_aug,
class_mode=None,
dataframe=df,
batch_size=4)
model = conv1()
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
In [9]:
imgen_aug = image.ImageDataGenerator(channel_shift_range=20)
batches = DirectoryIterator(train_path, imgen_aug,
class_mode=None,
dataframe=df,
batch_size=4)
model = conv1()
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[9]:
In [10]:
imgen_aug = image.ImageDataGenerator(horizontal_flip=True,
rotation_range=180,
width_shift_range=0.05,
channel_shift_range=20)
batches = DirectoryIterator(train_path, imgen_aug,
class_mode=None,
dataframe=df,
batch_size=4)
model = conv1()
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[10]:
In [12]:
model.optimizer.lr = 0.0001
model.fit_generator(batches, batches.nb_sample, nb_epoch=5,
validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[12]: