In [1]:
import pandas as pd
import numpy as np

from pyproj import Geod

from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid

from sklearn import preprocessing

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
kitchen = pd.read_csv("kitchen.csv", error_bad_lines=False, engine="python", encoding = "ISO-8859-1")
customer = pd.read_csv("customer.csv", error_bad_lines=False, engine="python", encoding = "ISO-8859-1")

customer1 = pd.read_csv("customer.csv", error_bad_lines=False, engine="python", encoding = "ISO-8859-1")
kitchen1 = pd.read_csv("kitchen.csv", error_bad_lines=False, engine="python", encoding = "ISO-8859-1")

In [3]:
kitchen.drop(['minCapacity'], axis=1, inplace=True)
kitchen.drop(['maxCapacity'], axis=1, inplace=True)
kitchen.drop(['tolerance'], axis=1, inplace=True)

customer.drop(['customersName'], axis=1, inplace=True)
customer.drop(['qtyOrdered'], axis=1, inplace=True)


lef = preprocessing.LabelEncoder()

#Create a new column with transformed values.
kitchen['kitchenName'] = lef.fit_transform(kitchen['kitchenName'])
print(kitchen)


   kitchenName        long       lat
0            0  106.814653 -6.150735
1            1  106.834772 -6.279489
2            2  106.793820 -6.192896
3            3  106.826822 -6.224094
4            4  106.795993 -6.157473
5            5  106.847226 -6.184124
6            6  106.914214 -6.256911

In [4]:
y = kitchen['kitchenName']
X = pd.DataFrame(kitchen.drop('kitchenName', axis=1))

clf = NearestCentroid()
clf.fit(X, y)
pred = clf.predict(customer)

#customer1['cluster'] = pd.Series(pred, index=customer1.index)
customer['cluster'] = pd.Series(pred, index=customer.index)

In [5]:
customer.shape


Out[5]:
(158, 3)

In [14]:
wgs84_geod = Geod(ellps='WGS84') #Distance will be measured on this ellipsoid - more accurate than a spherical method katanya

#Get distance between pairs of lat-lon points
def Distance(lat1,lon1,lat2,lon2):
  az12,az21,dist = wgs84_geod.inv(lon1,lat1,lon2,lat2)
  return dist



#Add/update a column to the data frame with the distances (in metres)
customer1['dist0'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[0]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist1'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[1]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist2'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[2]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist3'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[3]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist4'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[4]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist5'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[5]]*158,[kitchen['long'].iloc[0]]*158)
customer1['dist6'] = Distance(customer1['lat'].tolist(),customer1['long'].tolist(),[kitchen['lat'].iloc[6]]*158,[kitchen['long'].iloc[0]]*158)

In [15]:
print(customer1)


    customersName        long       lat  qtyOrdered         dist0  \
0        53WJWGRK  106.836794 -6.217340           5   7762.611478   
1        34JD8T7P  106.824099 -6.183299           9   3749.915562   
2        1FD97O63  106.820074 -6.203792           5   5898.053090   
3        39R7P6D7  106.912387 -6.205225           1  12382.173772   
4        4E74CSUP  106.708647 -6.169678           3  11918.385945   
5        5N0HQP9M  106.828488 -6.161143           2   1915.697006   
6        2KDXBO1K  106.732360 -6.184005           3   9823.107224   
7        2HV6K6KS  106.894592 -6.146279           4   8861.618248   
8        4GFQ5HJ7  106.803606 -6.245037           4  10500.040027   
9        2NN33T9V  106.880876 -6.161799           5   7431.154356   
10       9AYVVHRS  106.748505 -6.196547           6   8903.114612   
11       60B6SX6Q  106.787067 -6.232970           9   9592.970153   
12       2QXJZVWG  106.832551 -6.230380           4   9027.794125   
13       5TMUO6HJ  106.819481 -6.175477           6   2787.859119   
14       4L379A08  106.825108 -6.229270           4   8761.756328   
15       5SU6PL1Y  106.799072 -6.253435           3  11487.507404   
16       355J2F81  106.819210 -6.226392           1   8381.928409   
17       3V758V8K  106.831803 -6.139387           3   2275.576850   
18       40RP85WP  106.799023 -6.227039           7   8613.797234   
19       37WYDQAL  106.822782 -6.221090           1   7832.229474   
20       3RQTCAZ9  106.757140 -6.125242           1   6962.125848   
21       2MMGF15J  106.774863 -6.149745           6   4405.348774   
22       2HYWBKS1  106.832287 -6.239348           1   9991.927923   
23       3ZKM2569  106.816635 -6.224329           3   8141.576114   
24       WVQJYLQW  106.799447 -6.279837           1  14375.915729   
25       5PB8GTQP  106.827524 -6.190937           1   4668.539340   
26       9BKOTYZM  106.737873 -6.166694           2   8679.414941   
27       3T2W84PS  106.798767 -6.189966           1   4681.167817   
28       57FWCQEC  106.839178 -6.165656           1   3176.696883   
29       341PYND9  106.818017 -6.216884           1   7324.745008   
..            ...         ...       ...         ...           ...   
128      36G80EJT  106.812820 -6.258389           2  11906.976202   
129      6Q82SGTO  106.836316 -6.302188           1  16919.504683   
130      4G9CNKB6  106.876089 -6.186962           1   7892.188112   
131      2PP9C4RW  106.822109 -6.129017           1   2539.571273   
132      UPZ3XRMC  106.802813 -6.228712           1   8722.267358   
133      2PJ3XNUI  106.870749 -6.236101           1  11298.942609   
134      50W8N0H8  106.793283 -6.294194           4  16040.089957   
135      3ZONK3PA  106.803642 -6.291286           2  15590.890889   
136      5SQAGZ43  106.832374 -6.219838           1   7889.602273   
137      4L3HMEFP  106.821372 -6.174446           4   2725.606399   
138      5V325JCT  106.831998 -6.219503           1   7843.421635   
139      3RKFWUSO  106.835404 -6.285691          12  15100.116066   
140      99JPN82I  106.847998 -6.140276           1   3867.736570   
141      3EJ15Y5O  106.822093 -6.200262           1   5538.652982   
142      4GAA84UO  106.917938 -6.223125           1  13955.599924   
143      56BBU975  106.798800 -6.218460           6   7692.306302   
144      Q280F4KF  106.803606 -6.245037           1  10500.040027   
145      8B9S30VE  106.914584 -6.188593           1  11826.176802   
146      5V0TXYRB  106.781005 -6.248081           1  11391.164434   
147      4CWFSOVV  106.854954 -6.141550           6   4574.873669   
148      38NDP0W9  106.829328 -6.216536           1   7455.863033   
149      3RMHZUBC  106.799038 -6.177534           1   3430.747779   
150      2KBT36Z6  106.809504 -6.230935           4   8887.422732   
151      1VOEJ8JH  106.809504 -6.230935           3   8887.422732   
152      1EREMSX4  106.818595 -6.210859           9   6663.340432   
153      216FL94T  106.827000 -6.138080           1   1956.063044   
154      1DBK824Y  106.877250 -6.138597           2   7057.399205   
155      86RWME7T  106.805662 -6.224949           1   8267.245158   
156      5TSV5VWN  106.741274 -6.219497           2  11125.596210   
157      56D5EYXT  106.828194 -6.197497           1   5384.104950   

            dist1         dist2         dist3         dist4         dist5  \
0     7296.597732   3648.488505   2561.713598   7059.473726   4415.631819   
1    10688.562832   1489.765836   4630.959750   3041.406496   1049.424042   
2     8392.560471   1346.009511   2323.966816   5157.317989   2256.308230   
3    13580.286342  10902.092718  11015.631461  12037.085633  11065.454934   
4    16884.751280  12010.061721  13185.371245  11810.140859  11840.728558   
5    13176.798437   3830.909652   7128.045659   1584.220480   2967.065464   
6    13943.995217   9160.614260  10129.166203   9568.928869   9107.771354   
7    17183.636355  10239.945101  12342.096930   8934.030098   9787.535633   
8     4001.242952   5894.237984   2618.836350   9760.335748   6846.280486   
9    14936.552149   8096.114185  10058.644919   7345.276447   7734.104305   
10   11735.255423   7331.928560   7929.154301   8501.159151   7448.666607   
11    5981.900131   5381.397497   3206.721073   8889.714326   6204.820208   
12    5780.651537   4594.176999   2099.172347   8302.402543   5485.529247   
13   11514.780299   1999.130211   5402.960726   2061.471018   1095.387066   
14    5672.779886   4185.565823   1290.896081   8023.728336   5124.962405   
15    3357.661298   6913.272437   3674.382870  10751.372589   7856.523330   
16    5893.425113   3738.356369    564.747505   7638.252628   4701.490068   
17   15609.221134   6214.419797   9557.870008   2757.457911   5298.906851   
18    6052.610327   4153.113054   1760.080360   7885.237219   5051.335580   
19    6520.516506   3245.027496    959.037259   7092.520447   4185.829619   
20   18206.557354   9823.258836  12649.983738   7295.708847   9106.102193   
21   15008.497926   6493.502182   9327.112892   4486.112891   5817.880343   
22    4849.086118   5495.141448   2579.528453   9262.264583   6411.345119   
23    6103.869964   3482.973715    220.931097   7396.716407   4451.647823   
24    1683.032009   9760.666592   6389.973753  13636.158590  10717.628972   
25    9895.719229   1440.908925   3933.739232   3965.442493   1611.538771   
26   15092.745496   8978.189083  10606.641585   8558.989337   8713.658988   
27   10054.959378   1787.722869   4163.558795   4000.361176   1873.063524   
28   12877.670651   4054.933481   7009.368567   2861.351284   3396.863180   
29    6933.274455   2678.715893    880.015341   6580.659792   3641.976870   
..            ...           ...           ...           ...           ...   
128   2342.096899   7245.525473   3798.026709  11161.933976   8215.371444   
129   3470.943302  12321.690442   8962.691589  16182.207578  13274.678037   
130  12285.124864   6831.059548   7943.055988   7541.265492   6806.759015   
131  16660.712248   7112.325313  10546.697549   3253.304268   6149.729252   
132   5766.090560   4171.827403   1406.242818   7986.349760   5102.024129   
133   7845.839296   7833.864681   6348.404342  10684.178724   8460.620612   
134   2869.879142  11449.120798   8104.809203  15303.468117  12400.003908   
135   1785.081215  10948.650263   7529.791780  14848.126146  11913.295322   
136   6881.970787   3566.957321   2016.927040   7170.228163   4409.708140   
137  11640.148927   2171.698225   5540.603909   2018.989094   1303.242360   
138   6905.816292   3513.147444   1985.630727   7123.260538   4358.054388   
139   2396.469046  10515.706047   7188.467097  14364.071239  11464.432997   
140  15831.248690   6890.798544   9976.893170   4151.885236   6093.711765   
141   8800.057454   1158.247197   2761.187600   4803.054617   1965.523699   
142  13018.880310  11909.495371  11430.878460  13541.851198  12217.442613   
143   6973.265761   3327.137039   1861.734165   6968.859950   4182.884401   
144   4001.242952   5894.237984   2618.836350   9760.335748   6846.280486   
145  14944.607704  11070.105114  11735.712717  11583.295935  11071.000619   
146   5091.851857   7149.072039   4571.826225  10689.715594   7993.262082   
147  15892.940352   7220.741085  10159.824354   4795.646007   6485.580477   
148   7148.679202   3077.693388   1826.610509   6730.557905   3935.230731   
149  11406.498960   2423.326032   5431.194690   2812.191515   1875.467919   
150   5399.551759   4244.989327    947.065749   8143.938203   5208.019874   
151   5399.551759   4244.989327    947.065749   8143.938203   5208.019874   
152   7602.031209   2033.823560   1527.238522   5919.982789   2988.675622   
153  15697.570197   6214.131148   9609.740077   2543.033024   5272.023896   
154  17051.514987   9168.371029  11721.487383   7236.150997   8564.437976   
155   6112.897658   3681.582844    999.413261   7528.037582   4623.105577   
156  10485.805069   8637.277941   8136.561052  10630.173474   9014.036375   
157   9190.216762   1582.624267   3301.077797   4672.999278   2105.507175   

            dist6  
0     5015.430034  
1     8207.412363  
2     5904.890550  
3    12233.287823  
4    15188.644513  
5    10700.894648  
6    12163.239432  
7    15098.133028  
8     1794.124503  
9    12819.765651  
10    9907.087929  
11    4040.900840  
12    3539.967294  
13    9021.457783  
14    3268.428955  
15    1766.505743  
16    3412.524290  
17   13134.508187  
18    3728.876939  
19    4062.270910  
20   15891.395033  
21   12642.895208  
22    2753.358895  
23    3609.842691  
24    3042.853908  
25    7433.662012  
26   13104.986311  
27    7609.198400  
28   10450.271828  
29    4442.144990  
..            ...  
128    260.506042  
129   5551.256996  
130  10298.792796  
131  14167.561284  
132   3382.567652  
133   6620.660293  
134   4752.990531  
135   3991.890912  
136   4544.791302  
137   9149.884543  
138   4560.575529  
139   3924.567594  
140  13415.924454  
141   6318.576492  
142  12025.206835  
143   4599.908796  
144   1794.124503  
145  13393.508326  
146   3849.370998  
147  13514.726396  
148   4751.200552  
149   8946.563760  
150   2928.610102  
151   2928.610102  
152   5111.398499  
153  13212.082823  
154  14804.972730  
155   3671.989129  
156   9113.741878  
157   6739.188691  

[158 rows x 11 columns]

In [12]:
customer1.head()


Out[12]:
customersName long lat qtyOrdered dist0 dist1 dist2 dist3 dist4 dist5 dist6
0 53WJWGRK 106.836794 -6.217340 5 7762.555530 6876.584986 5470.548428 1332.632329 8013.878557 3850.380619 9620.540005
1 34JD8T7P 106.824099 -6.183299 9 3749.860738 10702.753685 3515.138180 4521.440870 4222.971445 2561.263807 12873.518195
2 1FD97O63 106.820074 -6.203792 5 5898.004581 8527.706662 3145.526911 2366.106512 5774.158638 3709.579169 11960.155267
3 39R7P6D7 106.912387 -6.205225 1 12382.113566 11883.627793 13192.753450 9696.582284 13922.418185 7579.716095 5719.372520
4 4E74CSUP 106.708647 -6.169678 3 11918.421749 18501.223142 9770.120040 14396.861206 9761.273555 15420.587809 24710.930704

In [9]:
def visualize(data):
    x = data['long']
    y = data['lat']
    Cluster = data['cluster'] 

    fig = plt.figure()
    ax = fig.add_subplot(111)
    scatter = ax.scatter(x,y,c=Cluster, cmap=plt.cm.Paired, s=10, label='customer')
    ax.scatter(kitchen['long'],kitchen['lat'], s=10, c='r', marker="x", label='second')
    ax.set_xlabel('longitude')
    ax.set_ylabel('latitude')
    plt.colorbar(scatter)

    fig.show()

In [10]:
visualize(customer)


/home/ken/anaconda3/lib/python3.6/site-packages/matplotlib/figure.py:403: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

In [11]:
dapurMiji = (customer1.where(customer1['cluster'] == 0))['qtyOrdered'].sum()
dapurNusantara = (customer1.where(customer1['cluster'] == 1))['qtyOrdered'].sum()
familiaCatering = (customer1.where(customer1['cluster'] == 2))['qtyOrdered'].sum()
pondokRawon = (customer1.where(customer1['cluster'] == 3))['qtyOrdered'].sum()
roseCatering = (customer1.where(customer1['cluster'] == 4))['qtyOrdered'].sum()
tigaKitchenCatering = (customer1.where(customer1['cluster'] == 5))['qtyOrdered'].sum()
ummuUwais = (customer1.where(customer1['cluster'] == 6))['qtyOrdered'].sum()

d = {'Dapur Miji': dapurMiji , 'Dapur Nusantara': dapurNusantara, 'Familia Catering': familiaCatering, 'Pondok Rawon': pondokRawon,'Rose Catering': roseCatering, 'Tiga Kitchen Catering': tigaKitchenCatering, 'Ummu Uwais': ummuUwais}


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2392             try:
-> 2393                 return self._engine.get_loc(key)
   2394             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5239)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5085)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20405)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20359)()

KeyError: 'cluster'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-11-bbc137321222> in <module>()
----> 1 dapurMiji = (customer1.where(customer1['cluster'] == 0))['qtyOrdered'].sum()
      2 dapurNusantara = (customer1.where(customer1['cluster'] == 1))['qtyOrdered'].sum()
      3 familiaCatering = (customer1.where(customer1['cluster'] == 2))['qtyOrdered'].sum()
      4 pondokRawon = (customer1.where(customer1['cluster'] == 3))['qtyOrdered'].sum()
      5 roseCatering = (customer1.where(customer1['cluster'] == 4))['qtyOrdered'].sum()

/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2060             return self._getitem_multilevel(key)
   2061         else:
-> 2062             return self._getitem_column(key)
   2063 
   2064     def _getitem_column(self, key):

/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2067         # get column
   2068         if self.columns.is_unique:
-> 2069             return self._get_item_cache(key)
   2070 
   2071         # duplicate columns & possible reduce dimensionality

/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1532         res = cache.get(item)
   1533         if res is None:
-> 1534             values = self._data.get(item)
   1535             res = self._box_item_values(item, values)
   1536             cache[item] = res

/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3588 
   3589             if not isnull(item):
-> 3590                 loc = self.items.get_loc(item)
   3591             else:
   3592                 indexer = np.arange(len(self.items))[isnull(self.items)]

/home/ken/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2393                 return self._engine.get_loc(key)
   2394             except KeyError:
-> 2395                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2396 
   2397         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5239)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5085)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20405)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20359)()

KeyError: 'cluster'

In [ ]:
print(customer.cluster.value_counts())

In [ ]:
print(d)
print(kitchen1.minCapacity.sum())

In [ ]:
print(kitchen1)

In [ ]: