In [9]:
import pandas as pd
In [10]:
data_total = pd.read_csv('uci_retail.csv', encoding = "ISO-8859-1")
In [11]:
data_total.head(10)
Out[11]:
InvoiceNo
StockCode
Description
Quantity
InvoiceDate
UnitPrice
CustomerID
Country
0
536365
85123A
WHITE HANGING HEART T-LIGHT HOLDER
6
12/1/2010 8:26
2.55
17850.0
United Kingdom
1
536365
71053
WHITE METAL LANTERN
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
2
536365
84406B
CREAM CUPID HEARTS COAT HANGER
8
12/1/2010 8:26
2.75
17850.0
United Kingdom
3
536365
84029G
KNITTED UNION FLAG HOT WATER BOTTLE
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
4
536365
84029E
RED WOOLLY HOTTIE WHITE HEART.
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
5
536365
22752
SET 7 BABUSHKA NESTING BOXES
2
12/1/2010 8:26
7.65
17850.0
United Kingdom
6
536365
21730
GLASS STAR FROSTED T-LIGHT HOLDER
6
12/1/2010 8:26
4.25
17850.0
United Kingdom
7
536366
22633
HAND WARMER UNION JACK
6
12/1/2010 8:28
1.85
17850.0
United Kingdom
8
536366
22632
HAND WARMER RED POLKA DOT
6
12/1/2010 8:28
1.85
17850.0
United Kingdom
9
536367
84879
ASSORTED COLOUR BIRD ORNAMENT
32
12/1/2010 8:34
1.69
13047.0
United Kingdom
In [65]:
data_geo_dist = data_total.groupby('Country')['CustomerID'].count()
data_geo_dist.sort_values(ascending=False)
Out[65]:
Country
United Kingdom 361878
Germany 9495
France 8491
EIRE 7485
Spain 2533
Netherlands 2371
Belgium 2069
Switzerland 1877
Portugal 1480
Australia 1259
Norway 1086
Italy 803
Channel Islands 758
Finland 695
Cyprus 622
Sweden 462
Austria 401
Denmark 389
Japan 358
Poland 341
USA 291
Israel 250
Unspecified 244
Singapore 229
Iceland 182
Canada 151
Greece 146
Malta 127
United Arab Emirates 68
European Community 61
RSA 58
Lebanon 45
Lithuania 35
Brazil 32
Czech Republic 30
Bahrain 17
Saudi Arabia 10
Hong Kong 0
Name: CustomerID, dtype: int64
In [72]:
data_cust_geo = data_total[data_total['Country'].str.contains("United Kingdom")]
data_cust_geo.head(10)
Out[72]:
InvoiceNo
StockCode
Description
Quantity
InvoiceDate
UnitPrice
CustomerID
Country
0
536365
85123A
WHITE HANGING HEART T-LIGHT HOLDER
6
12/1/2010 8:26
2.55
17850.0
United Kingdom
1
536365
71053
WHITE METAL LANTERN
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
2
536365
84406B
CREAM CUPID HEARTS COAT HANGER
8
12/1/2010 8:26
2.75
17850.0
United Kingdom
3
536365
84029G
KNITTED UNION FLAG HOT WATER BOTTLE
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
4
536365
84029E
RED WOOLLY HOTTIE WHITE HEART.
6
12/1/2010 8:26
3.39
17850.0
United Kingdom
5
536365
22752
SET 7 BABUSHKA NESTING BOXES
2
12/1/2010 8:26
7.65
17850.0
United Kingdom
6
536365
21730
GLASS STAR FROSTED T-LIGHT HOLDER
6
12/1/2010 8:26
4.25
17850.0
United Kingdom
7
536366
22633
HAND WARMER UNION JACK
6
12/1/2010 8:28
1.85
17850.0
United Kingdom
8
536366
22632
HAND WARMER RED POLKA DOT
6
12/1/2010 8:28
1.85
17850.0
United Kingdom
9
536367
84879
ASSORTED COLOUR BIRD ORNAMENT
32
12/1/2010 8:34
1.69
13047.0
United Kingdom
In [73]:
data_cust = data_cust_geo.groupby('CustomerID')['StockCode'].apply(','.join)
data_cust.head(10)
Out[73]:
CustomerID
12346.0 23166,23166
12747.0 85124C,85124B,84879,85062,85064,82484,21136,22...
12748.0 22854,22187,20894,85018D,21413,79149B,21682,78...
12749.0 22768,22766,22767,82482,82484,22170,22171,2245...
12820.0 21977,84992,21733,21985,22195,22196,22435,2280...
12821.0 23068,22156,22295,22595,22335,22752
12822.0 22418,22367,23292,23290,22614,22899,23377,2266...
12823.0 48138,48138,48138,48138,48138
12824.0 21506,22717,23541,23543,23234,22734,23274,2327...
12826.0 22086,22355,21481,20977,21479,22837,20978,2255...
Name: StockCode, dtype: object
In [75]:
data_cust_dist = data_cust_geo.groupby('CustomerID')['StockCode'].count()
data_cust_dist.sort_values(ascending=False)
Out[75]:
CustomerID
17841.0 7983
14096.0 5128
12748.0 4642
14606.0 2782
15311.0 2491
13089.0 1857
13263.0 1677
14298.0 1640
15039.0 1508
18118.0 1284
14159.0 1212
14796.0 1165
15005.0 1160
16033.0 1152
14056.0 1128
14769.0 1094
17511.0 1076
13081.0 1061
14527.0 1011
16549.0 981
14456.0 977
15719.0 938
15555.0 925
16931.0 898
17811.0 872
14505.0 803
17338.0 757
18283.0 756
17757.0 742
12921.0 741
...
18141.0 1
18233.0 1
17925.0 1
18113.0 1
16148.0 1
16579.0 1
13120.0 1
14351.0 1
15389.0 1
12943.0 1
15753.0 1
13099.0 1
17846.0 1
17956.0 1
15524.0 1
16323.0 1
16078.0 1
13185.0 1
18068.0 1
13841.0 1
17752.0 1
13154.0 1
14090.0 1
14679.0 1
16881.0 1
17948.0 1
17102.0 1
17307.0 1
15195.0 1
15590.0 1
Name: StockCode, dtype: int64
In [76]:
print(data_cust_dist.count())
print(data_cust_dist.max())
3950
7983
In [ ]:
Content source: y2ee201/Deep-Learning-Nanodegree
Similar notebooks: