In [1]:
%matplotlib inline
import os
import warnings
import os.path as op
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
In [27]:
import socket
hn = socket.gethostname()
if hn == 'AyerdiBorja':
datadir = '/Users/ayerdi/Desktop/data'
resultDir = '/Users/ayerdi/Desktop/Dropbox (Neurita)/IJCRS15_code'
elif hn == 'calm.local':
root_dir = op.expanduser('~/data/coal_mining')
datadir = op.join(root_dir, 'data')
resultDir = op.join(root_dir, 'results')
hdf_file = op.join(root_dir, 'coal_mining_data.hdf')
test_csv_file = op.join(datadir, 'testData.csv')
print('Root working dir: {}'.format(root_dir))
In [28]:
# Total Warnings: 1429
# Total Warnings 1: 80
# Total Warnings 2: 1208
# Total Warnings 3: 141
# Total Normal: 118571
# Acc. all normal: 0.988091666667
In [29]:
def read_csv_test_data(filepath=test_csv_file):
return pd.read_csv(filepath, sep=',', header=None)
In [30]:
def read_csv_training_data():
X = pd.DataFrame()
Y = pd.DataFrame()
for i in range(1,5):
trainFile = 'trainingData' + str(i) + '.csv'
testFile = 'trainingLabels' + str(i) + '.csv'
print('Reading data from {}'.format(trainFile))
# Load training data
train = os.path.join(datadir, trainFile)
train = pd.read_csv(train, sep=',', header=None)
# Load labels
labels = os.path.join(datadir, testFile)
labels = pd.read_csv(labels, sep=',', header=None)
label_0 = (labels[0] == 'warning') *1
label_1 = (labels[1] == 'warning') *1
label_2 = (labels[2] == 'warning') *1
y = pd.concat([label_0, label_1, label_2], axis=1)
X = pd.concat([X, train])
Y = pd.concat([Y, y])
X = X.reset_index()
X = X.drop('index',1)
Y = Y.reset_index()
Y = Y.drop('index',1)
return X, Y
In [33]:
#save to hdf file
# you need to install PyTables for this: !pip install tables
def convert_csv_to_hdf(hdf_filepath=hdf_file):
print('Reading test data.')
test_data = read_csv_test_data()
print('Reading training data.')
train_samples, train_labels = read_csv_training_data()
print('Saving data into {}'.format(hdf_filepath))
test_data.to_hdf (hdf_filepath, key='test_data')
train_samples.to_hdf(hdf_filepath, key='samples')
train_labels.to_hdf (hdf_filepath, key='labels')
def read_data_from_hdf(hdf_filepath=hdf_file):
test_data = pd.read_hdf(hdf_filepath, key='test_data')
train_samples = pd.read_hdf(hdf_filepath, key='samples')
train_labels = pd.read_hdf(hdf_filepath, key='labels')
return test_data, train_samples, train_labels
In [24]:
#remove hdf file
#!rm $hdf_file
#convert_csv_to_hdf()
In [35]:
# read the data from HDF file
test_data, train_samples, train_labels = read_data_from_hdf(hdf_filepath=hdf_file)
In [34]:
# Ya tenemos todos los datos en X e Y, ahora toca usar la imaginación
In [36]:
train_samples.shape
Out[36]:
In [ ]: