In [69]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [106]:
from utils import load_population_data
zipcode_pop = load_population_data("../resources/population_data.csv")

In [107]:
zipcode_pop.iloc[:10]


Out[107]:
11542    27633
11547      793
11545    12065
11548     2780
11549     2922
19390    13425
82443     4697
82442      732
82441      250
82440      145
dtype: int64

In [108]:
total_pop = zipcode_pop.sum(axis=0)
print total_pop


308722808

In [109]:
import numpy as np
pop_density = zipcode_pop / np.float64(total_pop)

In [110]:
pop_density.iloc[:10]


Out[110]:
11542    0.000090
11547    0.000003
11545    0.000039
11548    0.000009
11549    0.000009
19390    0.000043
82443    0.000015
82442    0.000002
82441    0.000001
82440    0.000000
dtype: float64

In [111]:
pop_density.index[:10]


Out[111]:
Index([u'11542', u'11547', u'11545', u'11548', u'11549', u'19390', u'82443', u'82442', u'82441', u'82440'], dtype='object')

In [135]:
from store import StoreGenerator

In [136]:
sg = StoreGenerator(zipcode_pop)

In [137]:
sg.create_stores(["01029", "03830", "05301", "10004"])


Out[137]:
id pop_density zipcode
01029 0 0.035368 01029
03830 1 0.072171 03830
05301 2 0.753990 05301
10004 3 0.138471 10004

4 rows × 3 columns


In [138]:
stores = sg.generate_n(10)

In [139]:
print stores


       id  pop_density zipcode
27958   0     0.060439   27958
30439   1     0.064806   30439
45424   2     0.297487   45424
77017   3     0.194001   77017
53185   4     0.110529   53185
46205   5     0.151073   46205
23517   6     0.026716   23517
40242   7     0.065122   40242
51462   8     0.006036   51462
95328   9     0.023791   95328

[10 rows x 3 columns]

In [140]:
from store import StoreSampler

In [141]:
ss = StoreSampler(stores)

In [142]:
ss.sample()


Out[142]:
id                     2
pop_density    0.2974875
zipcode            45424
Name: 45424, dtype: object

In [144]:
from customer import CustomerGenerator

In [155]:
cg = CustomerGenerator(stores)

In [156]:
customers = cg.generate_n(10)

In [157]:
customers


Out[157]:
id name pet_type purchase_pdf zipcode
0 0 Lisa Van Horn reptile 0.187397 45424
1 1 Lisa Doe cat 0.134411 46205
2 2 Susan Smith rabbit 0.066788 53185
3 3 Susan Smith rodent 0.081256 53185
4 4 Jay Houdini rodent 0.010211 45424
5 5 Marigold Houdini rodent 0.094618 45424
6 6 Chris Smith rabbit 0.009195 45424
7 7 Marigold Houdini fish 0.107318 45424
8 8 Lisa Danzig reptile 0.001110 45424
9 9 Lisa Danzig cat 0.307695 45424

10 rows × 5 columns


In [ ]: