https://www.sfdatainstitute.org/
In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)
In [0]:
# https://en.wikipedia.org/wiki/List_of_busiest_airports_by_passenger_traffic
airports = {
'HAM': ["germany europe regional", 18],
'TXL': ["germany europe regional", 21],
'FRA': ["germany europe hub", 70],
'MUC': ["germany europe hub", 46],
'CPH': ["denmark capital scandinavia europe hub", 29],
'ARN': ["sweden capital scandinavia europe regional", 27],
'BGO': ["norway scandinavia europe regional", 6],
'OSL': ["norway capital scandinavia europe regional", 29],
'LHR': ["gb capital europe hub", 80],
'CDG': ["france capital europe hub", 72],
'SFO': ["usa california regional", 58],
'IAD': ["usa capital regional", 21],
'AUS': ["usa texas regional", 16],
'EWR': ["usa new_jersey hub", 46],
'JFK': ["usa new_york hub", 62],
'ATL': ["usa georgia hub", 110],
'STL': ["usa missouri regional", 16],
'LAX': ["usa california hub", 88]
}
In [0]:
airport_names = list(airports.keys())
airport_numbers = list(range(0, len(airports)))
airport_to_number = dict(zip(airport_names, airport_numbers))
number_to_airport = dict(zip(airport_numbers, airport_names))
airport_descriptions = [value[0] for value in list(airports.values())]
airport_passengers = [value[1] for value in list(airports.values())]
In [4]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(airport_descriptions)
description_matrix = tokenizer.texts_to_matrix(airport_descriptions, mode='freq')
aiport_count, word_count = description_matrix.shape
dictionary_size = word_count
aiport_count, word_count
Out[4]:
In [0]:
x = airport_numbers
Y = description_matrix
In [6]:
%%time
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Flatten, GlobalAveragePooling1D, Dense, LSTM, GRU, SimpleRNN, Bidirectional, Embedding
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.initializers import glorot_normal
seed = 3
input_dim = len(airports)
embedding_dim = 2
model = Sequential()
model.add(Embedding(name='embedding',
input_dim=input_dim,
output_dim=embedding_dim,
input_length=1,
embeddings_initializer=glorot_normal(seed=seed)))
model.add(GlobalAveragePooling1D())
model.add(Dense(units=50, activation='relu', bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed)))
model.add(Dense(units=dictionary_size, name='output', activation='softmax', bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed)))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
EPOCHS=1000
BATCH_SIZE=2
%time history = model.fit(x, Y, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
plt.yscale('log')
plt.plot(history.history['loss'])
In [7]:
loss, accuracy = model.evaluate(x, Y)
loss, accuracy
Out[7]:
In [8]:
embedding_layer = model.get_layer('embedding')
embedding_model = Model(inputs=model.input, outputs=embedding_layer.output)
embeddings_2d = embedding_model.predict(airport_numbers).reshape(-1, 2)
# for printing only
# plt.figure(dpi=600)
plt.axis('off')
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1])
for name, x_pos, y_pos in zip(airport_names, embeddings_2d[:, 0], embeddings_2d[:, 1]):
print(name, (x_pos, y_pos))
plt.annotate(name, (x_pos, y_pos))
In [9]:
seed = 3
input_dim = len(airports)
embedding_dim = 1
model = Sequential()
model.add(Embedding(name='embedding',
input_dim=input_dim,
output_dim=embedding_dim,
input_length=1,
embeddings_initializer=glorot_normal(seed=seed)))
model.add(GlobalAveragePooling1D())
model.add(Dense(units=50, activation='relu', bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed)))
model.add(Dense(units=dictionary_size, name='output', activation='softmax', bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed)))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
EPOCHS=1500
BATCH_SIZE=2
%time history = model.fit(x, Y, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
plt.yscale('log')
plt.plot(history.history['loss'])
Out[9]:
In [16]:
import numpy as np
embedding_layer = model.get_layer('embedding')
embedding_model = Model(inputs=model.input, outputs=embedding_layer.output)
embeddings_1d = embedding_model.predict(airport_numbers).reshape(-1)
# for printing only
# plt.figure(figsize=(20,5))
# plt.figure(dpi=600)
plt.axis('off')
plt.scatter(embeddings_1d, np.zeros(len(embeddings_1d)))
for name, x_pos in zip(airport_names, embeddings_1d):
print(name, (x_pos, y_pos))
plt.annotate(name, (x_pos, 0), rotation=80)
In [0]: