In [2]:
%matplotlib qt4
from __future__ import division
from collections import defaultdict
from models import tools, optimize, models, filters
from models.tests import PerformanceTest
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set_style("ticks", {"legend.frameon": True})
mpl.rcParams['axes.color_cycle'] = ['#02A5F4', 'orange', 'green']
In [5]:
data1 = tools.load_data(limit=500000, offset=1000000)
In [14]:
data2 = tools.load_data(limit=2000000, offset=1000000)
In [6]:
data_by_place = {}
In [10]:
data_by_place['European Countries'] = data1[filters.european_countries(data1)]
data_by_place['African Countries'] = data1[filters.african_countries(data1)]
data_by_place['American Countries'] = data1[filters.american_countries(data1)]
data_by_place['Asian Countries'] = data1[filters.asian_countries(data1)]
data_by_place['USA States'] = data1[filters.usa_states(data1)]
In [23]:
data_by_place['Rivers'] = data1[filters.place_type(data1, 'river')]
data_by_place['Lakes'] = data2[filters.place_type(data2, 'lake')]
data_by_place['Mountains'] = data1[filters.place_type(data1, 'mountains')]
data_by_place['Islands'] = data2[filters.place_type(data2, 'island')]
data_by_place['Czech Regions'] = data2[filters.place_type(data2, 'region (cz)')]
In [135]:
for name, df in data_by_place.items():
print name, len(df)
In [136]:
for name, df in data_by_place.items():
print name, len(set(df['place_id']))
In [139]:
by_place_results = {}
for name, df in data_by_place.items():
descent = optimize.GradientDescent(df)
by_place_results[name] = descent.search_staircase(
init_learn_rate=0.015, number_of_iter=5, echo_iterations=False,
random_factor=0.1, random_chance=500)
best = by_place_results[name].best
result = 'g={}, d={}'.format(best['gamma'], -best['delta'])
tools.echo(name + ' done! ' + result, clear=False)
In [147]:
fig = plt.figure(num=None, figsize=(8, 6), dpi=120)
labels = []
X, Y, S = [], [], []
for name, df in data_by_place.items():
labels += [name]
S += [len(set(df['place_id']))]
Y += [by_place_results[name].best['gamma']]
X += [-by_place_results[name].best['delta']]
plt.scatter(X, Y, s=[s * 15 for s in S], c='#34b7f6', marker='o')
plt.xlim([-1.05, 0.1])
plt.xlabel(r'$\delta$', fontsize=15)
plt.ylabel(r'$\gamma$', fontsize=15)
for i in range(len(X)):
label = labels[i]
xy = [X[i], Y[i]]
xytext = (-18, 18)
plt.annotate(
label, xy=xy, xytext=xytext,
textcoords='offset points', ha='right',
bbox=dict(boxstyle='round,pad=0.4', fc='white', alpha=0.5, linewidth=1))
plt.annotate(str(S[i]), xy=xy, ha='center', va='center')
plt.show()
plt.tight_layout()
In [ ]: