In [1]:
%load_ext autoreload
In [2]:
autoreload 2
In [3]:
%matplotlib inline
In [4]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pyreto
In [5]:
fire_size = pd.read_csv("http://tuvalu.santafe.edu/~aaronc/powerlaws/data/fires.txt", names=['acres'])
In [6]:
fire_size.describe()
Out[6]:
In [7]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.2, 6324
result1 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=desired_xmin)
In [9]:
# check that I get the same estimates for both alpha and xmin using brute force minimization
result2 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=None, quantile=0.999, method='brute')
In [10]:
np.testing.assert_almost_equal(result2.params['alpha'], desired_alpha, decimal=1)
In [11]:
np.testing.assert_almost_equal(result2.xmin, desired_xmin, decimal=1)
In [12]:
# check that I get the same estimates for both alpha and xmin using bounded minimization
result3 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=None, quantile=0.999, method='bounded')
In [13]:
np.testing.assert_almost_equal(result3.params['alpha'], desired_alpha, decimal=1)
In [14]:
np.testing.assert_almost_equal(result3.xmin, desired_xmin, decimal=1)
In [36]:
pvalue, Ds = pyreto.distributions.Pareto.test_goodness_of_fit(42, result3, fire_size.acres, method='bounded')
In [38]:
# pareto distribution should be rejected...
assert pvalue <= 0.10
In [20]:
weblinks_histogram = pd.read_csv('http://tuvalu.santafe.edu/~aaronc/powerlaws/data/weblinks.hist', sep='\t')
In [21]:
weblinks_histogram.describe()
Out[21]:
In [22]:
# convert histogram data into degree series..
raw_counts = np.repeat(weblinks_histogram.degree.values, weblinks_histogram.frequency.values)
weblinks = pd.Series(raw_counts, name='count')
In [23]:
weblinks.describe()
Out[23]:
In [10]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.336, 3684
result1 = pyreto.distributions.Pareto.fit(weblinks, xmin=desired_xmin)
In [ ]:
np.testing.assert_almost_equal(result1.params['alpha'], desired_alpha, decimal=3)
In [14]:
# check that I get the same estimates for both alpha and xmin using bounded minimization
result2 = pyreto.distributions.Pareto.fit(weblinks, xmin=None, quantile=0.9999, method='bounded')
In [ ]:
np.testing.assert_almost_equal(result2.params['alpha'], desired_alpha, decimal=3)
In [17]:
test_scaling_threshold_estimation(desired_xmin, result2, decimal=1)
In [6]:
cities = pd.read_csv('http://tuvalu.santafe.edu/~aaronc/powerlaws/data/cities.txt', names=['population'])
cities.population /= 1e3 # CSN units are in thousands of persons
In [7]:
cities.describe()
Out[7]:
In [8]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.37, 52.46
result1 = pyreto.distributions.Pareto.fit(cities.population, xmin=desired_xmin)
test_scaling_exponent_estimation(desired_alpha, result1, decimal=2)
In [9]:
# check that I get the same estimates for both alpha and xmin using brute force minimization
result2 = pyreto.distributions.Pareto.fit(cities.population, xmin=None, quantile=0.99, method='brute')
test_scaling_exponent_estimation(desired_alpha, result2, decimal=2)
In [10]:
test_scaling_threshold_estimation(desired_xmin, result2, decimal=2)
In [11]:
# check that I get the same estimates for both alpha and xmin using bounded minimization
result3 = pyreto.distributions.Pareto.fit(cities.population, xmin=None, quantile=0.99, method='bounded')
In [12]:
test_scaling_exponent_estimation(desired_alpha, result3, decimal=2)
In [13]:
test_scaling_threshold_estimation(desired_xmin, result3, decimal=2)
In [19]:
# using brute force minmization to find xmin makes this test take a while!
pvalue, Ds = pyreto.distributions.Pareto.test_goodness_of_fit(42, result2, cities.population, quantile=0.99,
method='brute')
In [15]:
# pareto distribution should not be rejected...
assert pvalue > 0.10
In [16]:
pvalue
Out[16]:
In [17]:
pyreto.distributions.Pareto.test_goodness_of_fit??
In [ ]: