In [1]:
from exploringShipLogbooks.config import non_slave_ships
from exploringShipLogbooks.classification import LogbookClassifier
In [2]:
cl = LogbookClassifier(classification_algorithm="Naive Bayes")
In [3]:
cl.load_clean_and_classify(fuzz=False, export_csv=True)
In [4]:
# data that was classified (unknown class before classification)
cl.unclassified_logs.head()
Out[4]:
In [5]:
# data used for validation: 20% of slave voyage logs
cl.validation_set_2.head()
Out[5]:
In [6]:
# data used for validation: logs that mention slaves in cliwoc data set
cl.validation_set_1.head()
Out[6]:
In [7]:
# data used for training classifier
cl.training_data.head()
Out[7]:
In [8]:
import exploringShipLogbooks
import os.path as op
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import pandas as pd
In [9]:
# load un-cleaned slave_voyage_logs data
data_path = op.join(exploringShipLogbooks.__path__[0], 'data')
file_name = data_path + '/tastdb-exp-2010'
slave_voyage_logs = pd.read_pickle(file_name)
In [10]:
fig1, ax1 = plt.subplots()
ax1.hist(pd.concat([cl.validation_set_2, cl.training_data], ignore_index = True)['Year'])
ax1.set_xlabel('Year', fontsize = 30)
ax1.set_ylabel('Counts', fontsize = 30)
plt.xlim([1750, 1850])
for tick in ax1.xaxis.get_major_ticks():
tick.label.set_fontsize(26)
for tick in ax1.yaxis.get_major_ticks():
tick.label.set_fontsize(26)
fig1.set_size_inches(10, 8)
plt.savefig('slave_voyage_years.png')
In [11]:
fig2, ax2 = plt.subplots()
ax2.hist(pd.concat([cl.validation_set_1, cl.unclassified_logs], ignore_index = True)['Year'])
ax2.set_xlabel('Year', fontsize = 30)
ax2.set_ylabel('Counts', fontsize = 30)
plt.xlim([1750, 1850])
for tick in ax2.xaxis.get_major_ticks():
tick.label.set_fontsize(26)
for tick in ax2.yaxis.get_major_ticks():
tick.label.set_fontsize(26)
fig2.set_size_inches(11, 8)
plt.savefig('cliwoc_years.jpeg')
In [12]:
fractions = []
fract_dict = dict(slave_voyage_logs['national'].value_counts(normalize=True))
fractions = []
nats = []
for key in fract_dict:
if fract_dict[key] > 0.01:
nats.append(key)
fractions.append(fract_dict[key])
explode=[0.05] * len(fractions)
fig2, ax2 = plt.subplots()
fig2.set_size_inches(10,10)
matplotlib.rcParams['font.size'] = 30
matplotlib.pylab.pie(fractions, labels = nats, explode = explode)
plt.savefig('slave_voyages_nats.png')
In [13]:
fractions = []
fract_dict = dict(cl.cliwoc_data_all['Nationality'].value_counts(normalize=True))
fractions = []
nats = []
for key in fract_dict:
if fract_dict[key] > 0.01:
nats.append(key)
fractions.append(fract_dict[key])
explode=[0.05] * len(fractions)
fig2, ax2 = plt.subplots()
fig2.set_size_inches(10,10)
matplotlib.rcParams['font.size'] = 30
matplotlib.pylab.pie(fractions, labels = nats, explode = explode)
plt.savefig('cliwoc_nats.png')
In [ ]: