In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
np.random.randn(5)
In [ ]:
labels = ['a', 'b', 'c', 'd', 'e']
s = pd.Series(np.random.randn(5), index=labels)
'b' in s
In [ ]:
s['b']
In [ ]:
s.to_dict()
In [ ]:
s = pd.Series(s.to_dict(), index=['b', 'e', 'a', 'd', 'f'])
s
In [ ]:
s.dropna()
In [ ]:
s * 2
In [ ]:
s[3:]
In [ ]:
s[:3]
In [ ]:
s.index
In [ ]:
df = pd.DataFrame({'a': np.random.randn(6),
'b': ['foo', 'bar'] * 3,
'c': np.random.randn(6)})
df['d'] = range(6)
df
In [ ]:
df.info()
In [ ]:
df.dtypes
In [ ]:
df['b'] # by column
In [ ]:
df[:3]
In [ ]:
df[:-2]
In [ ]:
df[-2:] # the last 2 rows
In [ ]:
df[['a','b']]
In [ ]:
df.loc[2] # does equal thing as above. In this case, 3rd row.
In [ ]:
df.loc[2, 'b']
In [ ]:
df.loc[2:4, 'b']
In [ ]:
df.loc[2:4, ['b', 'c']]
In [ ]:
df.loc[2:4, 'b':'c'] # slices by column - essentially the same as above.
In [ ]:
df.loc[[0, 2, 4], ['b', 'c', 'd']] # pass a list of rows and columns I want to select out
In [ ]:
df['c'] > 0
In [ ]:
df.loc[df['c'] > 0] # boolean arrays
In [ ]:
df.index
In [ ]:
df.columns
In [ ]:
df.T
In [ ]:
df.T.T
In [ ]:
df['a'].dot(df['a'])
In [ ]:
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]])
hdf = pd.DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])
hdf
In [ ]:
hdf.loc['foo']
In [ ]:
hdf.loc['bar','one']['A']
In [2]:
data = pd.read_csv("store_sales.txt")
In [3]:
data
Out[3]:
Order ID
Order Date
Order Priority
Order Quantity
Sales
Ship Mode
Shipping Cost
Province
Customer Segment
Product Category
Product Sub-Category
Product Container
Ship Date
0
928
3/1/2011
Low
26
390.2000
Express Air
7.40
British Columbia
Consumer
Furniture
Office Furnishings
Small Box
3/3/2011
1
32323
7/23/2010
High
38
259.7175
Regular Air
5.03
Ontario
Small Business
Technology
Telephones and Communication
Medium Box
7/25/2010
2
48353
12/15/2012
Not Specified
18
71.2200
Regular Air
0.70
British Columbia
Corporate
Office Supplies
Pens & Art Supplies
Wrap Bag
12/17/2012
3
10144
1/2/2011
Critical
1
192.4900
Delivery Truck
30.00
British Columbia
Corporate
Furniture
Chairs & Chairmats
Jumbo Drum
1/4/2011
4
26756
5/10/2012
Medium
25
767.2600
Regular Air
4.00
British Columbia
Home Office
Technology
Computer Peripherals
Small Box
5/10/2012
5
18144
6/7/2011
Critical
48
207.0800
Regular Air
5.17
Northwest Territories
Corporate
Office Supplies
Paper
Small Box
6/9/2011
6
10369
11/9/2011
Low
23
683.6800
Regular Air
8.99
British Columbia
Home Office
Technology
Computer Peripherals
Small Pack
11/14/2011
7
22912
10/16/2010
Low
33
10168.2300
Express Air
19.99
Yukon
Corporate
Office Supplies
Binders and Binder Accessories
Small Box
10/20/2010
8
51008
8/20/2011
High
20
269.6600
Regular Air
4.59
Quebec
Consumer
Office Supplies
Scissors, Rulers and Trimmers
Wrap Bag
8/22/2011
9
18279
2/26/2009
Medium
20
10281.7900
Regular Air
24.49
Alberta
Consumer
Technology
Copiers and Fax
Large Box
2/27/2009
10
21635
12/2/2009
Low
2
65.3100
Regular Air
12.98
Saskachewan
Consumer
Office Supplies
Binders and Binder Accessories
Small Box
12/2/2009
11
22656
12/22/2010
Not Specified
10
309.3000
Express Air
6.50
Prince Edward Island
Corporate
Technology
Computer Peripherals
Small Box
12/24/2010
12
54882
2/6/2011
Critical
2
86.9200
Regular Air
13.89
Manitoba
Consumer
Office Supplies
Pens & Art Supplies
Wrap Bag
2/8/2011
13
25031
8/23/2009
High
24
4636.6200
Regular Air
69.00
British Columbia
Consumer
Furniture
Tables
Large Box
8/25/2009
14
59684
12/29/2010
Medium
3
14.4600
Regular Air
2.26
Manitoba
Corporate
Office Supplies
Paper
Wrap Bag
1/17/2011
15
775
3/17/2011
Critical
8
31.8700
Regular Air
1.20
Nova Scotia
Small Business
Office Supplies
Pens & Art Supplies
Wrap Bag
3/19/2011
16
29376
8/11/2012
Critical
20
2850.3100
Regular Air
7.07
New Brunswick
Consumer
Office Supplies
Storage & Organization
Small Box
8/13/2012
17
37505
6/3/2011
Low
42
384.4100
Regular Air
5.76
British Columbia
Home Office
Technology
Office Machines
Medium Box
6/8/2011
18
57382
3/23/2009
Medium
45
452.9300
Express Air
5.76
Ontario
Consumer
Office Supplies
Envelopes
Small Box
3/25/2009
19
7136
3/22/2009
High
9
47.2800
Regular Air
0.50
British Columbia
Home Office
Office Supplies
Labels
Small Box
3/24/2009
20
57542
8/22/2009
High
23
1256.2900
Regular Air
6.79
Ontario
Corporate
Office Supplies
Paper
Small Box
8/24/2009
21
53024
5/12/2012
Not Specified
46
2255.1945
Regular Air
5.00
Ontario
Corporate
Technology
Telephones and Communication
Small Pack
5/13/2012
22
59202
9/8/2012
Medium
7
83.8100
Regular Air
0.50
Prince Edward Island
Consumer
Office Supplies
Labels
Small Box
9/8/2012
23
12067
2/4/2010
Critical
7
407.1160
Regular Air
19.99
Saskachewan
Small Business
Technology
Telephones and Communication
Small Box
2/5/2010
24
45670
8/7/2012
Low
2
20.1600
Regular Air
7.86
Quebec
Corporate
Office Supplies
Paper
Small Box
8/11/2012
25
31939
6/25/2011
High
13
1506.8375
Regular Air
4.20
Yukon
Small Business
Technology
Telephones and Communication
Small Box
6/27/2011
26
42243
3/11/2012
Medium
19
145.2600
Regular Air
5.96
Manitoba
Small Business
Office Supplies
Paper
Small Box
3/13/2012
27
13536
8/18/2009
Medium
23
123.1500
Regular Air
2.04
Yukon
Home Office
Office Supplies
Paper
Wrap Bag
8/20/2009
28
32996
12/17/2010
Critical
2
116.3140
Regular Air
3.90
Saskachewan
Consumer
Technology
Telephones and Communication
Small Box
12/19/2010
29
16133
9/25/2009
Critical
1
27.9600
Regular Air
13.56
Nova Scotia
Small Business
Furniture
Office Furnishings
Large Box
9/27/2009
...
...
...
...
...
...
...
...
...
...
...
...
...
...
8369
8292
2/1/2010
Not Specified
18
299.8700
Regular Air
1.39
Quebec
Corporate
Office Supplies
Envelopes
Small Box
2/2/2010
8370
13923
7/8/2012
High
49
1220.2300
Regular Air
11.54
Yukon
Home Office
Office Supplies
Paper
Small Box
7/9/2012
8371
39399
12/17/2012
Medium
28
303.9700
Regular Air
5.14
Manitoba
Home Office
Office Supplies
Binders and Binder Accessories
Small Box
12/19/2012
8372
16547
2/18/2009
Not Specified
30
10554.6300
Delivery Truck
58.92
Northwest Territories
Corporate
Furniture
Chairs & Chairmats
Jumbo Drum
2/20/2009
8373
46726
7/5/2012
Not Specified
46
325.9700
Regular Air
8.37
Ontario
Small Business
Office Supplies
Scissors, Rulers and Trimmers
Small Pack
7/5/2012
8374
32966
1/26/2012
Low
43
74.1700
Regular Air
1.29
Manitoba
Home Office
Office Supplies
Pens & Art Supplies
Wrap Bag
1/30/2012
8375
42022
9/24/2009
Medium
11
43.2500
Regular Air
2.00
Ontario
Home Office
Office Supplies
Rubber Bands
Wrap Bag
9/25/2009
8376
26306
10/30/2012
Low
15
2567.6400
Regular Air
69.00
Quebec
Corporate
Furniture
Tables
Large Box
11/4/2012
8377
34530
8/30/2010
Low
38
612.4900
Regular Air
1.99
Ontario
Consumer
Technology
Computer Peripherals
Small Pack
9/4/2010
8378
22020
5/17/2010
Critical
37
498.2800
Regular Air
4.51
Newfoundland
Corporate
Office Supplies
Storage & Organization
Small Box
5/18/2010
8379
3040
4/11/2010
Medium
13
98.3900
Express Air
6.60
Manitoba
Consumer
Office Supplies
Paper
Small Box
4/12/2010
8380
25092
12/1/2010
High
6
141.2700
Express Air
2.99
Nova Scotia
Consumer
Office Supplies
Binders and Binder Accessories
Small Box
12/3/2010
8381
52071
11/27/2011
Not Specified
1
12.7400
Regular Air
4.95
Ontario
Consumer
Furniture
Office Furnishings
Small Box
11/29/2011
8382
12930
11/24/2011
High
49
1147.6400
Regular Air
4.50
Yukon
Corporate
Office Supplies
Appliances
Small Box
11/25/2011
8383
28582
1/22/2012
Not Specified
45
377.3100
Regular Air
2.87
British Columbia
Home Office
Office Supplies
Paper
Wrap Bag
1/23/2012
8384
52389
10/21/2011
Critical
31
655.3300
Regular Air
8.99
New Brunswick
Corporate
Office Supplies
Pens & Art Supplies
Small Pack
10/21/2011
8385
44519
5/19/2010
High
34
365.7700
Regular Air
1.39
Northwest Territories
Corporate
Office Supplies
Envelopes
Small Box
5/20/2010
8386
11425
6/22/2011
Critical
29
482.9100
Regular Air
6.25
Ontario
Consumer
Office Supplies
Storage & Organization
Small Box
6/23/2011
8387
23906
9/20/2010
Critical
37
321.0200
Express Air
3.50
Alberta
Corporate
Office Supplies
Appliances
Small Box
9/21/2010
8388
4359
3/2/2010
Critical
21
215.7600
Regular Air
7.46
Saskachewan
Home Office
Office Supplies
Storage & Organization
Small Box
3/4/2010
8389
51971
10/3/2011
Not Specified
39
1326.0400
Regular Air
35.00
Northwest Territories
Home Office
Office Supplies
Storage & Organization
Large Box
10/5/2011
8390
35686
10/2/2010
High
48
313.5200
Regular Air
9.17
Alberta
Home Office
Office Supplies
Paper
Small Box
10/4/2010
8391
5472
4/10/2010
Low
12
1098.1490
Regular Air
5.92
Yukon
Corporate
Technology
Telephones and Communication
Small Box
4/12/2010
8392
27841
9/16/2012
High
33
67.8500
Regular Air
0.70
Quebec
Corporate
Office Supplies
Pens & Art Supplies
Wrap Bag
9/18/2012
8393
59204
2/29/2012
Medium
25
151.7500
Regular Air
5.59
Manitoba
Home Office
Office Supplies
Binders and Binder Accessories
Small Box
3/1/2012
8394
21573
4/25/2012
Critical
31
96.2300
Regular Air
0.99
Ontario
Corporate
Office Supplies
Labels
Small Box
4/27/2012
8395
43111
9/12/2011
High
27
4722.8300
Regular Air
19.99
British Columbia
Consumer
Office Supplies
Binders and Binder Accessories
Small Box
9/13/2011
8396
6850
2/11/2010
High
8
81.5600
Regular Air
1.99
Alberta
Corporate
Technology
Computer Peripherals
Small Pack
2/11/2010
8397
57440
7/23/2010
Low
12
1171.2320
Express Air
2.50
Yukon
Home Office
Technology
Telephones and Communication
Small Box
7/30/2010
8398
2438
7/14/2010
High
20
278.6800
Regular Air
6.46
British Columbia
Corporate
Office Supplies
Binders and Binder Accessories
Small Box
7/17/2010
8399 rows × 13 columns
In [4]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8399 entries, 0 to 8398
Data columns (total 13 columns):
Order ID 8399 non-null int64
Order Date 8399 non-null object
Order Priority 8399 non-null object
Order Quantity 8399 non-null int64
Sales 8399 non-null float64
Ship Mode 8399 non-null object
Shipping Cost 8399 non-null float64
Province 8399 non-null object
Customer Segment 8399 non-null object
Product Category 8399 non-null object
Product Sub-Category 8399 non-null object
Product Container 8399 non-null object
Ship Date 8399 non-null object
dtypes: float64(2), int64(2), object(9)
memory usage: 853.1+ KB
In [5]:
data.describe()
Out[5]:
Order ID
Order Quantity
Sales
Shipping Cost
count
8399.000000
8399.000000
8399.000000
8399.000000
mean
29965.179783
25.571735
1775.878179
12.838557
std
17260.883447
14.481071
3585.050525
17.264052
min
3.000000
1.000000
2.240000
0.490000
25%
15011.500000
13.000000
143.195000
3.300000
50%
29857.000000
26.000000
449.420000
6.070000
75%
44596.000000
38.000000
1709.320000
13.990000
max
59973.000000
50.000000
89061.050000
164.730000
In [6]:
data['Sales'].sum()
Out[6]:
14915600.823999992
In [7]:
data[(data['Province'] == 'Yukon')]
Out[7]:
Order ID
Order Date
Order Priority
Order Quantity
Sales
Ship Mode
Shipping Cost
Province
Customer Segment
Product Category
Product Sub-Category
Product Container
Ship Date
7
22912
10/16/2010
Low
33
10168.2300
Express Air
19.99
Yukon
Corporate
Office Supplies
Binders and Binder Accessories
Small Box
10/20/2010
25
31939
6/25/2011
High
13
1506.8375
Regular Air
4.20
Yukon
Small Business
Technology
Telephones and Communication
Small Box
6/27/2011
27
13536
8/18/2009
Medium
23
123.1500
Regular Air
2.04
Yukon
Home Office
Office Supplies
Paper
Wrap Bag
8/20/2009
53
25797
9/1/2012
Not Specified
28
925.4300
Regular Air
1.99
Yukon
Corporate
Technology
Computer Peripherals
Small Pack
9/3/2012
79
55299
3/30/2012
Low
20
22.2800
Regular Air
0.70
Yukon
Corporate
Office Supplies
Rubber Bands
Wrap Bag
3/30/2012
84
42115
2/18/2012
Critical
21
673.9200
Regular Air
1.99
Yukon
Home Office
Technology
Computer Peripherals
Small Pack
2/18/2012
87
12007
9/27/2009
Not Specified
42
152.2600
Regular Air
0.50
Yukon
Corporate
Office Supplies
Labels
Small Box
9/29/2009
124
14851
5/11/2012
Not Specified
26
1443.2660
Regular Air
8.99
Yukon
Corporate
Technology
Telephones and Communication
Small Box
5/13/2012
128
30848
5/1/2011
Medium
13
4324.2900
Delivery Truck
40.19
Yukon
Corporate
Furniture
Tables
Jumbo Box
5/3/2011
145
50786
10/12/2009
Low
4
487.7200
Delivery Truck
14.00
Yukon
Consumer
Technology
Office Machines
Jumbo Drum
10/19/2009
148
8132
5/14/2009
Low
45
294.8600
Regular Air
6.92
Yukon
Home Office
Office Supplies
Paper
Small Box
5/21/2009
154
23778
7/13/2009
Low
13
105.9400
Regular Air
49.00
Yukon
Corporate
Office Supplies
Appliances
Large Box
7/17/2009
184
21988
3/28/2010
Medium
4
1337.1900
Delivery Truck
91.05
Yukon
Corporate
Office Supplies
Appliances
Jumbo Drum
3/30/2010
241
37543
12/31/2011
Medium
6
241.8500
Regular Air
13.89
Yukon
Home Office
Office Supplies
Pens & Art Supplies
Wrap Bag
1/1/2012
242
11714
8/4/2009
Critical
1
3501.7900
Delivery Truck
8.73
Yukon
Corporate
Technology
Office Machines
Jumbo Box
8/5/2009
243
9093
12/9/2011
Medium
36
268.5500
Regular Air
2.83
Yukon
Consumer
Technology
Computer Peripherals
Small Pack
12/9/2011
246
29505
12/27/2012
Medium
22
45.2100
Express Air
0.70
Yukon
Home Office
Office Supplies
Pens & Art Supplies
Wrap Bag
12/28/2012
296
8325
1/14/2009
Critical
27
112.5700
Regular Air
0.94
Yukon
Home Office
Office Supplies
Pens & Art Supplies
Wrap Bag
1/17/2009
318
3175
7/14/2009
Low
23
3553.6200
Regular Air
19.99
Yukon
Small Business
Office Supplies
Storage & Organization
Small Box
7/16/2009
329
12099
9/11/2009
Critical
48
308.8100
Regular Air
1.02
Yukon
Corporate
Office Supplies
Paper
Wrap Bag
9/13/2009
375
31233
9/24/2011
Low
35
590.3200
Regular Air
8.99
Yukon
Home Office
Office Supplies
Pens & Art Supplies
Small Pack
9/28/2011
389
7812
4/20/2011
Low
12
3635.6300
Delivery Truck
29.20
Yukon
Corporate
Furniture
Tables
Jumbo Box
4/22/2011
393
26021
9/14/2012
Critical
17
328.6270
Regular Air
4.81
Yukon
Consumer
Technology
Telephones and Communication
Medium Box
9/16/2012
408
4099
8/20/2012
Critical
42
1691.5100
Regular Air
6.22
Yukon
Consumer
Office Supplies
Storage & Organization
Small Box
8/21/2012
419
20868
10/9/2010
Low
9
1609.1900
Regular Air
19.99
Yukon
Home Office
Office Supplies
Storage & Organization
Small Box
10/16/2010
436
25889
6/8/2010
Low
23
321.6300
Regular Air
5.00
Yukon
Consumer
Furniture
Office Furnishings
Small Pack
6/10/2010
446
30887
7/7/2010
Critical
44
4357.1170
Regular Air
5.92
Yukon
Home Office
Technology
Telephones and Communication
Small Box
7/7/2010
465
28832
9/24/2010
Medium
46
1500.8400
Express Air
14.72
Yukon
Corporate
Office Supplies
Envelopes
Small Box
9/25/2010
469
57889
4/7/2010
Low
39
723.3075
Regular Air
0.99
Yukon
Consumer
Technology
Telephones and Communication
Wrap Bag
4/11/2010
494
1888
10/30/2010
Not Specified
35
158.2500
Regular Air
5.42
Yukon
Corporate
Office Supplies
Binders and Binder Accessories
Small Box
10/31/2010
...
...
...
...
...
...
...
...
...
...
...
...
...
...
7982
3078
2/11/2010
High
12
33.8600
Regular Air
0.70
Yukon
Corporate
Office Supplies
Pens & Art Supplies
Wrap Bag
2/12/2010
8000
9824
1/22/2011
Critical
39
1939.6600
Express Air
4.50
Yukon
Consumer
Office Supplies
Appliances
Small Box
1/24/2011
8030
8868
1/28/2009
Low
3
190.4500
Regular Air
3.99
Yukon
Corporate
Office Supplies
Appliances
Small Box
2/6/2009
8045
27969
6/8/2010
Low
43
5677.8700
Delivery Truck
54.74
Yukon
Home Office
Furniture
Bookcases
Jumbo Box
6/15/2010
8048
29284
4/10/2009
Medium
20
6449.0560
Delivery Truck
42.52
Yukon
Home Office
Furniture
Tables
Jumbo Box
4/12/2009
8057
10146
5/1/2009
Low
19
596.2100
Regular Air
5.02
Yukon
Corporate
Furniture
Office Furnishings
Small Box
5/6/2009
8067
6693
5/24/2012
Critical
16
40.8500
Regular Air
0.80
Yukon
Corporate
Office Supplies
Rubber Bands
Wrap Bag
5/26/2012
8087
23297
7/2/2012
Critical
34
5208.7800
Regular Air
7.07
Yukon
Corporate
Office Supplies
Storage & Organization
Small Box
7/4/2012
8088
19840
5/24/2010
Critical
25
8270.5700
Delivery Truck
48.26
Yukon
Corporate
Technology
Office Machines
Jumbo Box
5/27/2010
8095
13923
7/8/2012
High
28
4332.3000
Delivery Truck
15.59
Yukon
Home Office
Technology
Office Machines
Jumbo Drum
7/10/2012
8105
23555
12/11/2012
High
29
104.7000
Regular Air
0.50
Yukon
Corporate
Office Supplies
Labels
Small Box
12/12/2012
8112
27363
10/18/2011
Medium
6
646.1400
Regular Air
35.00
Yukon
Small Business
Office Supplies
Storage & Organization
Large Box
10/20/2011
8136
10080
2/12/2010
Low
20
637.0400
Regular Air
4.00
Yukon
Corporate
Technology
Computer Peripherals
Small Box
2/19/2010
8140
18849
8/5/2012
High
39
539.6000
Regular Air
8.78
Yukon
Corporate
Furniture
Office Furnishings
Small Box
8/7/2012
8163
19556
12/30/2010
Not Specified
45
5785.5900
Delivery Truck
30.00
Yukon
Consumer
Furniture
Chairs & Chairmats
Jumbo Drum
1/16/2011
8167
39490
8/12/2012
Low
17
2844.6400
Delivery Truck
60.00
Yukon
Small Business
Furniture
Tables
Jumbo Drum
8/16/2012
8172
16709
10/6/2012
High
3
24.3600
Regular Air
5.74
Yukon
Small Business
Office Supplies
Paper
Small Box
10/8/2012
8173
23555
12/11/2012
High
5
284.4525
Regular Air
8.99
Yukon
Corporate
Technology
Telephones and Communication
Small Box
12/12/2012
8179
56711
2/6/2011
Low
25
1279.4500
Regular Air
7.23
Yukon
Consumer
Office Supplies
Paper
Small Box
2/11/2011
8180
20450
12/7/2012
Critical
26
6878.5500
Delivery Truck
66.67
Yukon
Home Office
Furniture
Tables
Jumbo Box
12/10/2012
8217
871
11/14/2011
Low
17
104.9400
Regular Air
5.22
Yukon
Home Office
Furniture
Office Furnishings
Small Box
11/14/2011
8222
14981
10/26/2010
Medium
8
154.2100
Delivery Truck
27.75
Yukon
Home Office
Furniture
Tables
Jumbo Box
10/28/2010
8225
28007
4/8/2010
Not Specified
20
238.7700
Regular Air
7.95
Yukon
Small Business
Office Supplies
Pens & Art Supplies
Small Pack
4/9/2010
8324
2341
5/29/2010
Low
29
157.4900
Regular Air
4.96
Yukon
Small Business
Office Supplies
Paper
Small Box
6/5/2010
8351
10948
8/13/2012
Medium
50
193.1100
Regular Air
7.01
Yukon
Corporate
Office Supplies
Binders and Binder Accessories
Small Box
8/13/2012
8364
5958
9/15/2011
Medium
3
2036.9700
Regular Air
24.49
Yukon
Small Business
Technology
Copiers and Fax
Large Box
9/17/2011
8370
13923
7/8/2012
High
49
1220.2300
Regular Air
11.54
Yukon
Home Office
Office Supplies
Paper
Small Box
7/9/2012
8382
12930
11/24/2011
High
49
1147.6400
Regular Air
4.50
Yukon
Corporate
Office Supplies
Appliances
Small Box
11/25/2011
8391
5472
4/10/2010
Low
12
1098.1490
Regular Air
5.92
Yukon
Corporate
Technology
Telephones and Communication
Small Box
4/12/2010
8397
57440
7/23/2010
Low
12
1171.2320
Express Air
2.50
Yukon
Home Office
Technology
Telephones and Communication
Small Box
7/30/2010
542 rows × 13 columns
In [8]:
data[['Province', 'Product Category']]
Out[8]:
Province
Product Category
0
British Columbia
Furniture
1
Ontario
Technology
2
British Columbia
Office Supplies
3
British Columbia
Furniture
4
British Columbia
Technology
5
Northwest Territories
Office Supplies
6
British Columbia
Technology
7
Yukon
Office Supplies
8
Quebec
Office Supplies
9
Alberta
Technology
10
Saskachewan
Office Supplies
11
Prince Edward Island
Technology
12
Manitoba
Office Supplies
13
British Columbia
Furniture
14
Manitoba
Office Supplies
15
Nova Scotia
Office Supplies
16
New Brunswick
Office Supplies
17
British Columbia
Technology
18
Ontario
Office Supplies
19
British Columbia
Office Supplies
20
Ontario
Office Supplies
21
Ontario
Technology
22
Prince Edward Island
Office Supplies
23
Saskachewan
Technology
24
Quebec
Office Supplies
25
Yukon
Technology
26
Manitoba
Office Supplies
27
Yukon
Office Supplies
28
Saskachewan
Technology
29
Nova Scotia
Furniture
...
...
...
8369
Quebec
Office Supplies
8370
Yukon
Office Supplies
8371
Manitoba
Office Supplies
8372
Northwest Territories
Furniture
8373
Ontario
Office Supplies
8374
Manitoba
Office Supplies
8375
Ontario
Office Supplies
8376
Quebec
Furniture
8377
Ontario
Technology
8378
Newfoundland
Office Supplies
8379
Manitoba
Office Supplies
8380
Nova Scotia
Office Supplies
8381
Ontario
Furniture
8382
Yukon
Office Supplies
8383
British Columbia
Office Supplies
8384
New Brunswick
Office Supplies
8385
Northwest Territories
Office Supplies
8386
Ontario
Office Supplies
8387
Alberta
Office Supplies
8388
Saskachewan
Office Supplies
8389
Northwest Territories
Office Supplies
8390
Alberta
Office Supplies
8391
Yukon
Technology
8392
Quebec
Office Supplies
8393
Manitoba
Office Supplies
8394
Ontario
Office Supplies
8395
British Columbia
Office Supplies
8396
Alberta
Technology
8397
Yukon
Technology
8398
British Columbia
Office Supplies
8399 rows × 2 columns
In [9]:
data[['Province', 'Product Category']].drop_duplicates()
Out[9]:
Province
Product Category
0
British Columbia
Furniture
1
Ontario
Technology
2
British Columbia
Office Supplies
4
British Columbia
Technology
5
Northwest Territories
Office Supplies
7
Yukon
Office Supplies
8
Quebec
Office Supplies
9
Alberta
Technology
10
Saskachewan
Office Supplies
11
Prince Edward Island
Technology
12
Manitoba
Office Supplies
15
Nova Scotia
Office Supplies
16
New Brunswick
Office Supplies
18
Ontario
Office Supplies
22
Prince Edward Island
Office Supplies
23
Saskachewan
Technology
25
Yukon
Technology
29
Nova Scotia
Furniture
30
Northwest Territories
Technology
36
Quebec
Technology
38
Alberta
Furniture
43
Northwest Territories
Furniture
45
Nunavut
Office Supplies
46
Ontario
Furniture
47
Manitoba
Technology
48
Alberta
Office Supplies
51
Quebec
Furniture
58
Saskachewan
Furniture
88
Nunavut
Technology
98
Nunavut
Furniture
100
New Brunswick
Furniture
102
Manitoba
Furniture
109
Nova Scotia
Technology
127
New Brunswick
Technology
128
Yukon
Furniture
138
Prince Edward Island
Furniture
265
Newfoundland
Furniture
544
Newfoundland
Technology
910
Newfoundland
Office Supplies
In [10]:
data.groupby(['Province', 'Product Category'])
Out[10]:
<pandas.core.groupby.DataFrameGroupBy object at 0x119f97e10>
In [11]:
data.groupby(['Province', 'Product Category']).groups
Out[11]:
{('Alberta',
'Furniture'): Int64Index([ 38, 103, 118, 121, 149, 183, 214, 215, 301, 339,
...
7826, 7832, 7867, 7874, 7896, 8005, 8058, 8237, 8255, 8315],
dtype='int64', length=171),
('Alberta',
'Office Supplies'): Int64Index([ 48, 56, 65, 122, 133, 155, 160, 283, 291, 310,
...
8205, 8208, 8216, 8240, 8250, 8329, 8350, 8365, 8387, 8390],
dtype='int64', length=464),
('Alberta',
'Technology'): Int64Index([ 9, 49, 63, 66, 77, 130, 141, 166, 231, 257,
...
8145, 8147, 8171, 8202, 8204, 8232, 8242, 8270, 8319, 8396],
dtype='int64', length=230),
('British Columbia',
'Furniture'): Int64Index([ 0, 3, 13, 60, 114, 181, 234, 325, 357, 391,
...
8184, 8196, 8248, 8272, 8273, 8290, 8305, 8309, 8359, 8361],
dtype='int64', length=234),
('British Columbia',
'Office Supplies'): Int64Index([ 2, 19, 104, 105, 115, 137, 142, 146, 147, 150,
...
8220, 8261, 8268, 8287, 8303, 8307, 8332, 8383, 8395, 8398],
dtype='int64', length=615),
('British Columbia',
'Technology'): Int64Index([ 4, 6, 17, 44, 99, 120, 123, 151, 164, 300,
...
8082, 8085, 8097, 8103, 8123, 8135, 8251, 8328, 8331, 8334],
dtype='int64', length=277),
('Manitoba',
'Furniture'): Int64Index([ 102, 171, 217, 270, 280, 362, 424, 479, 496, 501,
...
8040, 8044, 8054, 8065, 8221, 8231, 8236, 8239, 8316, 8341],
dtype='int64', length=164),
('Manitoba',
'Office Supplies'): Int64Index([ 12, 14, 26, 32, 71, 76, 89, 93, 101, 126,
...
8288, 8310, 8345, 8346, 8353, 8366, 8371, 8374, 8379, 8393],
dtype='int64', length=430),
('Manitoba',
'Technology'): Int64Index([ 47, 134, 229, 275, 295, 382, 388, 437, 472, 484,
...
7811, 7814, 7851, 7871, 7890, 7923, 7969, 8137, 8155, 8170],
dtype='int64', length=199),
('New Brunswick',
'Furniture'): Int64Index([ 100, 297, 341, 415, 471, 601, 635, 769, 991, 1023, 1104,
1135, 1178, 1626, 1738, 1808, 1834, 1840, 1876, 2005, 2727, 2984,
3680, 3721, 3857, 3865, 3902, 4251, 4297, 4307, 4392, 4469, 4490,
4543, 4671, 4838, 4857, 4868, 4904, 4923, 5132, 5196, 5349, 5422,
5481, 5508, 5537, 5855, 5956, 6169, 6204, 6234, 6434, 6679, 6833,
6853, 6932, 7213, 7301, 7562, 7879, 8020, 8092],
dtype='int64'),
('New Brunswick',
'Office Supplies'): Int64Index([ 16, 91, 94, 116, 230, 244, 298, 359, 367, 464,
...
7981, 8004, 8025, 8027, 8129, 8165, 8210, 8312, 8320, 8384],
dtype='int64', length=182),
('New Brunswick',
'Technology'): Int64Index([ 127, 173, 319, 350, 755, 766, 858, 936, 1007, 1120, 1405,
1613, 1642, 1707, 1751, 1962, 2022, 2094, 2324, 2620, 2757, 2791,
2820, 2845, 2916, 2970, 3035, 3336, 3483, 3537, 3645, 3751, 3806,
3849, 3852, 3882, 3885, 3887, 3930, 3958, 3979, 4199, 4204, 4489,
4561, 4980, 5004, 5110, 5542, 5723, 5812, 5944, 6212, 6225, 6251,
6257, 6485, 6496, 6598, 6649, 6800, 6855, 6884, 6886, 6935, 7037,
7153, 7164, 7182, 7244, 7373, 7409, 7589, 7728, 7941, 7974, 8068,
8269],
dtype='int64'),
('Newfoundland',
'Furniture'): Int64Index([ 265, 1592, 2377, 2405, 3102, 3890, 4446, 5042, 5229, 5382, 6191,
7161, 7484, 8164, 8321],
dtype='int64'),
('Newfoundland',
'Office Supplies'): Int64Index([ 910, 981, 1209, 1215, 1280, 1605, 1793, 2036, 2222, 2445, 2450,
2475, 2483, 2818, 3017, 3052, 3071, 3176, 3198, 3406, 3965, 4048,
4211, 4262, 4458, 4613, 4731, 4765, 4869, 4927, 5224, 5419, 5447,
5967, 6372, 6373, 6489, 6801, 6997, 7040, 7080, 7397, 7574, 7707,
7773, 8343, 8378],
dtype='int64'),
('Newfoundland',
'Technology'): Int64Index([ 544, 611, 717, 1119, 1262, 1348, 2802, 2837, 2854, 2932, 3584,
3797, 4038, 4459, 4488, 5296, 5797, 7198, 7666, 7717],
dtype='int64'),
('Northwest Territories',
'Furniture'): Int64Index([ 43, 222, 292, 356, 412, 457, 478, 1138, 1260, 1601, 1677,
1817, 1827, 1917, 2013, 2191, 2196, 2216, 2304, 2344, 2429, 2439,
2809, 2859, 2890, 2907, 3042, 3066, 3259, 3304, 3392, 3500, 3550,
3627, 3694, 3764, 3772, 3881, 3889, 3959, 4003, 4151, 4158, 4284,
4343, 4414, 4444, 4466, 4609, 4664, 4713, 4780, 5074, 5171, 5185,
5315, 5329, 5336, 5630, 5761, 5833, 5977, 6007, 6100, 6166, 6284,
6324, 6385, 6440, 6483, 6524, 6734, 6754, 6817, 7085, 7209, 7224,
7494, 7569, 7670, 7699, 7706, 8152, 8212, 8372],
dtype='int64'),
('Northwest Territories',
'Office Supplies'): Int64Index([ 5, 69, 73, 82, 135, 136, 254, 405, 445, 502,
...
8168, 8192, 8277, 8285, 8289, 8302, 8326, 8337, 8385, 8389],
dtype='int64', length=209),
('Northwest Territories',
'Technology'): Int64Index([ 30, 33, 129, 195, 218, 282, 286, 313, 386, 413, 439,
451, 551, 664, 667, 681, 702, 765, 969, 1117, 1181, 1293,
1300, 1385, 1495, 1502, 1741, 1958, 1983, 2011, 2183, 2237, 2329,
2388, 2407, 2422, 2447, 2494, 2513, 2770, 2921, 3116, 3296, 3365,
3543, 3741, 3869, 4092, 4127, 4186, 4338, 4634, 4642, 4666, 4705,
4727, 4816, 4837, 5019, 5070, 5148, 5214, 5250, 5279, 5330, 5587,
5615, 5671, 5684, 5702, 5754, 5764, 5850, 5868, 5927, 6282, 6444,
6472, 6538, 6546, 6618, 6652, 6765, 6777, 6893, 6913, 7044, 7135,
7200, 7212, 7437, 7502, 7536, 7570, 7626, 7712, 7757, 8081, 8149,
8286],
dtype='int64'),
('Nova Scotia',
'Furniture'): Int64Index([ 29, 42, 90, 117, 143, 188, 205, 345, 366, 490, 553,
561, 649, 653, 685, 740, 745, 759, 892, 919, 941, 1109,
1141, 1277, 1297, 1508, 1621, 1638, 1937, 1951, 1986, 2002, 2037,
2189, 2331, 2348, 2362, 2382, 2538, 2546, 2628, 2634, 2871, 2915,
3055, 3106, 3182, 3202, 3384, 3408, 3499, 3507, 3727, 3757, 4046,
4137, 4265, 4362, 4381, 4457, 4477, 4586, 4625, 4655, 4818, 4860,
4992, 5036, 5138, 5353, 5458, 5491, 5493, 5532, 5660, 5678, 5741,
5798, 5824, 5960, 5985, 6101, 6250, 6278, 6329, 6484, 6622, 6650,
6814, 6868, 7183, 7364, 7385, 7454, 7720, 7730, 7827, 7895, 8169,
8335],
dtype='int64'),
('Nova Scotia',
'Office Supplies'): Int64Index([ 15, 179, 206, 238, 247, 284, 306, 320, 330, 418,
...
8224, 8226, 8253, 8298, 8311, 8318, 8330, 8344, 8367, 8380],
dtype='int64', length=244),
('Nova Scotia',
'Technology'): Int64Index([ 109, 174, 377, 564, 565, 584, 657, 772, 775, 799,
...
7461, 7487, 7654, 7673, 7792, 7906, 7933, 7968, 8003, 8034],
dtype='int64', length=120),
('Nunavut',
'Furniture'): Int64Index([98, 1971, 2935, 3845, 4252, 4373, 5183, 5595, 5729, 6281, 6302,
6378, 7265],
dtype='int64'),
('Nunavut',
'Office Supplies'): Int64Index([ 45, 281, 628, 661, 802, 830, 842, 995, 1580, 1594, 1890,
2247, 2385, 2390, 2437, 2462, 2589, 2804, 3574, 3709, 3788, 3835,
3954, 4052, 4154, 4419, 4512, 4716, 4949, 5212, 5342, 5389, 5750,
5795, 5918, 5930, 6189, 6469, 6477, 6879, 7286, 7561, 7713, 7854,
7932, 7994, 8086, 8278, 8325, 8327],
dtype='int64'),
('Nunavut',
'Technology'): Int64Index([ 88, 314, 461, 1162, 1939, 2158, 2636, 2978, 3638, 3868, 4451,
7417, 7459, 7801, 8156, 8299],
dtype='int64'),
('Ontario',
'Furniture'): Int64Index([ 46, 62, 74, 78, 106, 108, 110, 119, 140, 163,
...
8227, 8228, 8276, 8291, 8314, 8333, 8336, 8354, 8358, 8381],
dtype='int64', length=391),
('Ontario',
'Office Supplies'): Int64Index([ 18, 20, 31, 35, 41, 59, 81, 83, 111, 112,
...
8304, 8313, 8338, 8347, 8352, 8357, 8373, 8375, 8386, 8394],
dtype='int64', length=998),
('Ontario',
'Technology'): Int64Index([ 1, 21, 34, 40, 72, 80, 95, 97, 107, 185,
...
8122, 8125, 8157, 8187, 8191, 8218, 8238, 8263, 8362, 8377],
dtype='int64', length=437),
('Prince Edward Island',
'Furniture'): Int64Index([ 138, 274, 315, 353, 577, 615, 655, 943, 1021, 1103, 1129,
1375, 2213, 2493, 2654, 2777, 2786, 2878, 2990, 3123, 3441, 4061,
4181, 4325, 4506, 4764, 4811, 5018, 5103, 5262, 5517, 5580, 5627,
5809, 5942, 5997, 6198, 6380, 6711, 6758, 7199, 7296, 7455, 7468,
7638, 7736, 7833, 7989, 7993, 8200, 8363],
dtype='int64'),
('Prince Edward Island',
'Office Supplies'): Int64Index([ 22, 96, 267, 365, 425, 538, 571, 630, 712, 722,
...
7650, 7697, 7766, 7963, 8091, 8118, 8219, 8282, 8293, 8356],
dtype='int64', length=110),
('Prince Edward Island',
'Technology'): Int64Index([ 11, 394, 512, 560, 774, 881, 937, 938, 1715, 2163, 2197,
2533, 2721, 2872, 2956, 3137, 3269, 3275, 3330, 3363, 3485, 3947,
4097, 4202, 4229, 4323, 4387, 4405, 4995, 5039, 5048, 5129, 5145,
5369, 5425, 5507, 5526, 5679, 5758, 5923, 6043, 6086, 6911, 7071,
7113, 7669, 8109, 8113, 8214, 8281],
dtype='int64'),
('Quebec',
'Furniture'): Int64Index([ 51, 132, 178, 202, 354, 398, 399, 421, 430, 590,
...
7975, 7983, 8028, 8080, 8119, 8185, 8198, 8284, 8295, 8376],
dtype='int64', length=170),
('Quebec',
'Office Supplies'): Int64Index([ 8, 24, 54, 64, 68, 86, 172, 186, 237, 245,
...
8235, 8254, 8258, 8260, 8265, 8271, 8294, 8322, 8369, 8392],
dtype='int64', length=440),
('Quebec',
'Technology'): Int64Index([ 36, 37, 55, 57, 158, 235, 277, 336, 480, 495,
...
7999, 8114, 8120, 8134, 8181, 8246, 8267, 8317, 8339, 8349],
dtype='int64', length=171),
('Saskachewan',
'Furniture'): Int64Index([ 58, 203, 219, 253, 276, 285, 309, 340, 360, 368,
...
8059, 8138, 8151, 8161, 8177, 8182, 8190, 8340, 8348, 8355],
dtype='int64', length=157),
('Saskachewan',
'Office Supplies'): Int64Index([ 10, 50, 52, 70, 75, 85, 92, 113, 170, 189,
...
8211, 8243, 8244, 8247, 8256, 8292, 8308, 8342, 8360, 8388],
dtype='int64', length=527),
('Saskachewan',
'Technology'): Int64Index([ 23, 28, 39, 61, 67, 131, 182, 207, 252, 271,
...
8143, 8178, 8195, 8199, 8229, 8230, 8241, 8306, 8323, 8368],
dtype='int64', length=229),
('Yukon',
'Furniture'): Int64Index([ 128, 389, 436, 519, 580, 632, 756, 834, 1035, 1057,
...
7971, 8045, 8048, 8057, 8140, 8163, 8167, 8180, 8217, 8222],
dtype='int64', length=110),
('Yukon',
'Office Supplies'): Int64Index([ 7, 27, 79, 87, 148, 154, 184, 241, 246, 296,
...
8087, 8105, 8112, 8172, 8179, 8225, 8324, 8351, 8370, 8382],
dtype='int64', length=294),
('Yukon',
'Technology'): Int64Index([ 25, 53, 84, 124, 145, 242, 243, 393, 446, 469,
...
7803, 7883, 7942, 8088, 8095, 8136, 8173, 8364, 8391, 8397],
dtype='int64', length=138)}
In [12]:
data.groupby(['Province', 'Product Category']).groups.keys()
Out[12]:
dict_keys([('Alberta', 'Furniture'), ('Alberta', 'Office Supplies'), ('Alberta', 'Technology'), ('British Columbia', 'Furniture'), ('British Columbia', 'Office Supplies'), ('British Columbia', 'Technology'), ('Manitoba', 'Furniture'), ('Manitoba', 'Office Supplies'), ('Manitoba', 'Technology'), ('New Brunswick', 'Furniture'), ('New Brunswick', 'Office Supplies'), ('New Brunswick', 'Technology'), ('Newfoundland', 'Furniture'), ('Newfoundland', 'Office Supplies'), ('Newfoundland', 'Technology'), ('Northwest Territories', 'Furniture'), ('Northwest Territories', 'Office Supplies'), ('Northwest Territories', 'Technology'), ('Nova Scotia', 'Furniture'), ('Nova Scotia', 'Office Supplies'), ('Nova Scotia', 'Technology'), ('Nunavut', 'Furniture'), ('Nunavut', 'Office Supplies'), ('Nunavut', 'Technology'), ('Ontario', 'Furniture'), ('Ontario', 'Office Supplies'), ('Ontario', 'Technology'), ('Prince Edward Island', 'Furniture'), ('Prince Edward Island', 'Office Supplies'), ('Prince Edward Island', 'Technology'), ('Quebec', 'Furniture'), ('Quebec', 'Office Supplies'), ('Quebec', 'Technology'), ('Saskachewan', 'Furniture'), ('Saskachewan', 'Office Supplies'), ('Saskachewan', 'Technology'), ('Yukon', 'Furniture'), ('Yukon', 'Office Supplies'), ('Yukon', 'Technology')])
In [ ]:
len(data.groupby(['Province', 'Product Category']).groups[('Alberta', 'Furniture')])
In [13]:
data.groupby(['Province', 'Product Category']).agg({'Sales':'sum'})
Out[13]:
Sales
Province
Product Category
Alberta
Furniture
5.327443e+05
Office Supplies
3.194548e+05
Technology
8.525925e+05
British Columbia
Furniture
6.402451e+05
Office Supplies
4.780560e+05
Technology
7.744567e+05
Manitoba
Furniture
5.116348e+05
Office Supplies
3.344119e+05
Technology
5.268021e+05
New Brunswick
Furniture
1.431296e+05
Office Supplies
1.796133e+05
Technology
3.614687e+05
Newfoundland
Furniture
5.382918e+04
Office Supplies
2.385716e+04
Technology
2.523773e+04
Northwest Territories
Furniture
2.828379e+05
Office Supplies
2.168489e+05
Technology
3.011605e+05
Nova Scotia
Furniture
3.406439e+05
Office Supplies
1.792060e+05
Technology
2.978795e+05
Nunavut
Furniture
4.354561e+04
Office Supplies
3.861547e+04
Technology
3.421540e+04
Ontario
Furniture
1.109617e+06
Office Supplies
9.274313e+05
Technology
1.026164e+06
Prince Edward Island
Furniture
1.711241e+05
Office Supplies
9.578797e+04
Technology
1.424712e+05
Quebec
Furniture
6.057841e+05
Office Supplies
3.518227e+05
Technology
5.525883e+05
Saskachewan
Furniture
4.075563e+05
Office Supplies
3.856785e+05
Technology
6.712210e+05
Yukon
Furniture
3.358981e+05
Office Supplies
2.219782e+05
Technology
4.179911e+05
In [14]:
data.groupby(['Province', 'Product Category']).agg({'Sales':['sum', 'mean']})
Out[14]:
Sales
sum
mean
Province
Product Category
Alberta
Furniture
5.327443e+05
3115.463567
Office Supplies
3.194548e+05
688.480108
Technology
8.525925e+05
3706.923700
British Columbia
Furniture
6.402451e+05
2736.090274
Office Supplies
4.780560e+05
777.326813
Technology
7.744567e+05
2795.872457
Manitoba
Furniture
5.116348e+05
3119.724500
Office Supplies
3.344119e+05
777.702070
Technology
5.268021e+05
2647.246603
New Brunswick
Furniture
1.431296e+05
2271.897778
Office Supplies
1.796133e+05
986.886264
Technology
3.614687e+05
4634.213635
Newfoundland
Furniture
5.382918e+04
3588.612133
Office Supplies
2.385716e+04
507.599149
Technology
2.523773e+04
1261.886275
Northwest Territories
Furniture
2.828379e+05
3327.504800
Office Supplies
2.168489e+05
1037.554689
Technology
3.011605e+05
3011.604915
Nova Scotia
Furniture
3.406439e+05
3406.439200
Office Supplies
1.792060e+05
734.450779
Technology
2.978795e+05
2482.328771
Nunavut
Furniture
4.354561e+04
3349.662615
Office Supplies
3.861547e+04
772.309400
Technology
3.421540e+04
2138.462469
Ontario
Furniture
1.109617e+06
2837.896419
Office Supplies
9.274313e+05
929.289830
Technology
1.026164e+06
2348.200754
Prince Edward Island
Furniture
1.711241e+05
3355.374902
Office Supplies
9.578797e+04
870.799727
Technology
1.424712e+05
2849.423200
Quebec
Furniture
6.057841e+05
3563.436141
Office Supplies
3.518227e+05
799.597000
Technology
5.525883e+05
3231.510269
Saskachewan
Furniture
4.075563e+05
2595.900051
Office Supplies
3.856785e+05
731.837837
Technology
6.712210e+05
2931.095945
Yukon
Furniture
3.358981e+05
3053.618855
Office Supplies
2.219782e+05
755.027755
Technology
4.179911e+05
3028.921283
In [15]:
data.groupby(['Province', 'Product Category']).agg({'Sales':['sum', 'mean', lambda x: x.max()]})
Out[15]:
Sales
sum
mean
<lambda>
Province
Product Category
Alberta
Furniture
5.327443e+05
3115.463567
28389.1400
Office Supplies
3.194548e+05
688.480108
21752.0100
Technology
8.525925e+05
3706.923700
27720.9800
British Columbia
Furniture
6.402451e+05
2736.090274
20701.9280
Office Supplies
4.780560e+05
777.326813
23516.3100
Technology
7.744567e+05
2795.872457
29884.6000
Manitoba
Furniture
5.116348e+05
3119.724500
29345.2700
Office Supplies
3.344119e+05
777.702070
16269.8200
Technology
5.268021e+05
2647.246603
28761.5200
New Brunswick
Furniture
1.431296e+05
2271.897778
14567.1500
Office Supplies
1.796133e+05
986.886264
20175.4800
Technology
3.614687e+05
4634.213635
89061.0500
Newfoundland
Furniture
5.382918e+04
3588.612133
12098.8700
Office Supplies
2.385716e+04
507.599149
4115.7400
Technology
2.523773e+04
1261.886275
4373.8535
Northwest Territories
Furniture
2.828379e+05
3327.504800
26133.3900
Office Supplies
2.168489e+05
1037.554689
23792.9300
Technology
3.011605e+05
3011.604915
22079.4700
Nova Scotia
Furniture
3.406439e+05
3406.439200
28180.0800
Office Supplies
1.792060e+05
734.450779
12569.3100
Technology
2.978795e+05
2482.328771
25312.0000
Nunavut
Furniture
4.354561e+04
3349.662615
14223.8200
Office Supplies
3.861547e+04
772.309400
10123.0200
Technology
3.421540e+04
2138.462469
12028.2300
Ontario
Furniture
1.109617e+06
2837.896419
21425.9100
Office Supplies
9.274313e+05
929.289830
23106.4600
Technology
1.026164e+06
2348.200754
24051.4900
Prince Edward Island
Furniture
1.711241e+05
3355.374902
20333.8160
Office Supplies
9.578797e+04
870.799727
9293.8200
Technology
1.424712e+05
2849.423200
23255.6100
Quebec
Furniture
6.057841e+05
3563.436141
24701.1200
Office Supplies
3.518227e+05
799.597000
25409.6300
Technology
5.525883e+05
3231.510269
45923.7600
Saskachewan
Furniture
4.075563e+05
2595.900051
14493.8400
Office Supplies
3.856785e+05
731.837837
17853.6400
Technology
6.712210e+05
2931.095945
41343.2100
Yukon
Furniture
3.358981e+05
3053.618855
17248.0900
Office Supplies
2.219782e+05
755.027755
10223.1900
Technology
4.179911e+05
3028.921283
23949.5100
In [16]:
def get_range(x):
return x.max() - x.min()
data.groupby(['Province', 'Product Category']).agg({'Sales':['sum', 'mean', get_range]})
Out[16]:
Sales
sum
mean
get_range
Province
Product Category
Alberta
Furniture
5.327443e+05
3115.463567
28377.5000
Office Supplies
3.194548e+05
688.480108
21748.8100
Technology
8.525925e+05
3706.923700
27711.2800
British Columbia
Furniture
6.402451e+05
2736.090274
20690.2280
Office Supplies
4.780560e+05
777.326813
23514.0700
Technology
7.744567e+05
2795.872457
29872.4000
Manitoba
Furniture
5.116348e+05
3119.724500
29338.1200
Office Supplies
3.344119e+05
777.702070
16258.8800
Technology
5.268021e+05
2647.246603
28750.9000
New Brunswick
Furniture
1.431296e+05
2271.897778
14523.5900
Office Supplies
1.796133e+05
986.886264
20170.5100
Technology
3.614687e+05
4634.213635
89043.2800
Newfoundland
Furniture
5.382918e+04
3588.612133
12036.8400
Office Supplies
2.385716e+04
507.599149
4106.8700
Technology
2.523773e+04
1261.886275
4316.8235
Northwest Territories
Furniture
2.828379e+05
3327.504800
26102.7100
Office Supplies
2.168489e+05
1037.554689
23787.9400
Technology
3.011605e+05
3011.604915
22062.2800
Nova Scotia
Furniture
3.406439e+05
3406.439200
28168.2700
Office Supplies
1.792060e+05
734.450779
12560.8200
Technology
2.978795e+05
2482.328771
25290.1600
Nunavut
Furniture
4.354561e+04
3349.662615
14104.8500
Office Supplies
3.861547e+04
772.309400
10108.2600
Technology
3.421540e+04
2138.462469
11966.3585
Ontario
Furniture
1.109617e+06
2837.896419
21414.9000
Office Supplies
9.274313e+05
929.289830
23102.8300
Technology
1.026164e+06
2348.200754
24025.5300
Prince Edward Island
Furniture
1.711241e+05
3355.374902
20319.5860
Office Supplies
9.578797e+04
870.799727
9278.5600
Technology
1.424712e+05
2849.423200
23083.6465
Quebec
Furniture
6.057841e+05
3563.436141
24692.6400
Office Supplies
3.518227e+05
799.597000
25406.2100
Technology
5.525883e+05
3231.510269
45892.0600
Saskachewan
Furniture
4.075563e+05
2595.900051
14476.5400
Office Supplies
3.856785e+05
731.837837
17848.0100
Technology
6.712210e+05
2931.095945
41314.4800
Yukon
Furniture
3.358981e+05
3053.618855
17237.0100
Office Supplies
2.219782e+05
755.027755
10219.9600
Technology
4.179911e+05
3028.921283
23921.0500
In [17]:
pi_data = data.pivot_table(
index=['Province','Customer Segment'],
columns=['Product Category'],
values=['Sales'],
aggfunc=[np.sum, np.mean])
pi_data
Out[17]:
sum
mean
Sales
Sales
Product Category
Furniture
Office Supplies
Technology
Furniture
Office Supplies
Technology
Province
Customer Segment
Alberta
Consumer
162768.026
69671.93
272461.3965
3875.429190
633.381182
4394.538653
Corporate
183668.470
104750.15
207666.6335
2962.394677
717.466781
3053.921081
Home Office
75358.298
66599.01
93000.3770
2511.943267
652.931471
2657.153629
Small Business
110949.476
78433.68
279464.0440
2998.634486
739.940377
4299.446831
British Columbia
Consumer
118935.350
81564.77
132430.8070
2765.938372
748.300642
3395.661718
Corporate
235096.268
208556.60
284924.0030
2641.531101
931.056250
2793.372578
Home Office
161262.954
110930.86
231927.5145
2643.654984
711.095256
2863.302648
Small Business
124950.552
77003.76
125174.3460
3047.574439
611.140952
2275.897200
Manitoba
Consumer
103836.990
93882.35
92606.0265
2884.360833
838.235268
1747.283519
Corporate
239492.230
133412.34
243046.5450
3628.670152
789.422130
3240.620600
Home Office
82354.110
60685.13
106386.9145
2656.584194
689.603750
2594.802793
Small Business
85951.488
46432.07
84762.5880
2772.628645
761.181475
2825.419600
New Brunswick
Consumer
45006.860
48055.99
215192.2230
2812.928750
980.734490
8276.623962
Corporate
40694.152
61295.55
47923.2125
2260.786222
1201.873529
2819.012500
Home Office
41224.814
35243.21
67546.2400
2576.550875
749.855532
3216.487619
Small Business
16203.734
35018.55
30806.9880
1246.441077
1000.530000
2200.499143
Newfoundland
Consumer
15851.210
3547.09
6410.8030
3170.242000
354.709000
1282.160600
Corporate
12772.690
10721.62
13149.9180
2554.538000
564.295789
2191.653000
Home Office
8567.480
5705.18
2117.3030
4283.740000
713.147500
529.325750
Small Business
16637.802
3883.27
3559.7015
5545.934000
388.327000
711.940300
Northwest Territories
Consumer
20401.550
6912.44
40418.8605
2914.507143
432.027500
4041.886050
Corporate
142402.776
77869.48
157221.6860
3311.692465
802.778144
3082.778157
Home Office
79519.120
53267.38
61220.5350
3786.624762
1183.719556
2782.751591
Small Business
40514.462
78799.63
42299.4100
2893.890143
1545.090784
2488.200588
Nova Scotia
Consumer
104839.942
53507.36
56424.9100
2995.426914
906.904407
1612.140286
Corporate
88844.334
52712.20
84815.9230
2776.385437
642.831707
2494.585971
Home Office
98198.814
18936.87
86299.8745
7553.754923
485.560769
4109.517833
Small Business
48760.830
54049.56
70338.7450
2438.041500
844.524375
2344.624833
Nunavut
Consumer
14370.510
12888.53
NaN
7185.255000
1288.853000
NaN
Corporate
18311.180
17250.60
25416.2350
2615.882857
958.366667
2541.623500
Home Office
10863.924
3278.33
8799.1645
2715.981000
252.179231
1466.527417
Small Business
NaN
5198.01
NaN
NaN
577.556667
NaN
Ontario
Consumer
222697.398
111436.65
147726.2305
2818.954405
683.660429
1758.645601
Corporate
398422.294
299829.40
413835.4975
3018.350712
876.694152
2570.406817
Home Office
321527.902
336534.95
283872.7950
2949.797266
1136.942399
2468.459087
Small Business
166969.906
179630.25
180729.2065
2351.688817
911.828680
2347.132552
Prince Edward Island
Consumer
22082.710
21369.57
12503.7240
2760.338750
1017.598571
1786.246286
Corporate
69124.584
39208.08
106731.0170
2229.825290
687.861053
3557.700567
Home Office
19460.970
17596.92
7049.6020
4865.242500
1466.410000
1174.933667
Small Business
60455.856
17613.40
16186.8170
7556.982000
880.670000
2312.402429
Quebec
Consumer
158780.536
96249.18
80223.8415
4178.435158
1132.343294
2111.153724
Corporate
179729.424
106941.52
176530.7545
4179.754047
798.071045
3209.650082
Home Office
173650.906
76894.61
161796.1205
3858.909022
674.514123
4257.792645
Small Business
93623.278
71737.37
134037.5395
2127.801773
670.442710
3350.938487
Saskachewan
Consumer
73222.784
49375.62
91631.6300
2711.954963
726.112059
2545.323056
Corporate
148016.770
134479.97
393288.9375
2466.946167
614.063790
3893.949876
Home Office
108190.980
128119.95
102589.8600
2774.127692
800.749687
1831.961786
Small Business
78125.774
73703.00
83710.5440
2520.186258
921.287500
2325.292889
Yukon
Consumer
66013.348
42920.75
95391.1855
3300.667400
809.825472
3077.135016
Corporate
106265.402
94288.12
140198.3110
2361.453378
772.853443
2645.251151
Home Office
105165.646
46262.01
106757.2465
3392.440194
711.723231
3050.207043
Small Business
58453.678
38507.28
75644.3940
4175.262714
713.097778
3981.283895
In [ ]:
pi_data.to_csv("/Users/ramanathanhari/pi_data.csv")
In [18]:
sd = pd.read_csv("stock_data.csv")
sd
Out[18]:
date
AA
AAPL
GE
IBM
JNJ
MSFT
PEP
SPX
XOM
0
10/29/07
37.41
185.09
34.46
106.78
57.13
31.78
65.67
1540.98
85.51
1
10/30/07
36.43
187.00
34.39
106.15
56.99
32.70
65.80
1531.02
83.25
2
10/31/07
36.79
189.95
34.97
108.01
57.30
33.84
65.69
1549.38
84.03
3
11/01/07
35.22
187.44
34.27
105.72
56.85
34.07
64.51
1508.44
80.84
4
11/02/07
35.83
187.87
34.27
106.59
56.95
34.07
65.03
1509.65
80.32
5
11/05/07
35.22
186.18
34.15
105.48
56.70
33.77
64.84
1502.17
80.07
6
11/06/07
35.84
191.79
34.14
105.27
56.80
33.47
65.49
1520.27
82.56
7
11/07/07
34.76
186.30
33.20
103.69
56.19
32.65
64.46
1475.62
79.96
8
11/08/07
35.00
175.47
33.15
99.05
56.79
31.94
65.50
1474.77
82.00
9
11/09/07
34.47
165.37
32.61
93.58
57.29
31.01
65.48
1453.70
79.64
10
11/12/07
33.33
153.76
32.50
94.70
58.30
30.69
64.67
1439.18
77.52
11
11/13/07
34.04
169.96
33.31
98.27
59.18
31.79
65.35
1481.05
79.67
12
11/14/07
34.25
166.11
33.14
96.56
58.81
31.30
65.99
1470.58
79.15
13
11/15/07
33.76
164.30
32.55
96.71
58.80
31.14
65.57
1451.15
77.48
14
11/16/07
33.74
166.39
32.84
97.82
59.56
31.44
66.08
1458.74
78.04
15
11/19/07
32.59
163.95
32.42
95.42
59.42
31.32
66.90
1433.27
77.13
16
11/20/07
32.88
168.85
32.32
96.54
59.47
31.90
66.69
1439.70
80.53
17
11/21/07
32.68
168.46
31.58
95.42
59.03
31.57
66.87
1416.77
79.82
18
11/23/07
32.66
171.54
32.01
97.13
59.16
31.46
67.29
1440.70
80.96
19
11/26/07
32.41
172.54
31.21
95.19
59.60
30.41
66.56
1407.22
78.57
20
11/27/07
32.56
174.81
31.82
96.92
59.92
30.49
67.65
1428.23
79.21
21
11/28/07
33.65
180.22
32.68
100.23
60.46
31.08
68.76
1469.02
80.62
22
11/29/07
33.97
184.29
32.40
100.35
60.51
30.98
68.20
1469.72
81.24
23
11/30/07
33.80
182.22
32.53
98.18
59.93
30.99
68.78
1481.14
81.76
24
12/03/07
32.98
178.86
31.38
98.79
59.90
30.36
68.24
1472.42
81.48
25
12/04/07
32.76
179.81
30.80
99.54
60.10
30.23
68.30
1462.79
80.81
26
12/05/07
33.09
185.50
31.19
100.97
60.34
31.50
68.30
1485.01
82.46
27
12/06/07
33.32
189.95
31.66
102.40
60.42
31.87
68.98
1507.34
83.85
28
12/07/07
34.30
194.30
31.63
101.62
59.87
31.85
68.95
1504.66
83.91
29
12/10/07
35.17
194.21
31.78
102.11
59.97
32.06
69.20
1515.96
84.39
...
...
...
...
...
...
...
...
...
...
...
970
09/02/11
12.04
374.05
15.61
166.98
64.07
25.80
63.30
1173.97
72.14
971
09/06/11
11.77
379.74
15.11
165.11
64.64
25.51
62.45
1165.24
71.15
972
09/07/11
12.25
383.93
15.65
167.31
65.43
26.00
61.62
1198.62
73.65
973
09/08/11
12.03
384.14
15.44
165.25
64.95
26.22
61.34
1185.90
72.82
974
09/09/11
11.58
377.48
14.95
161.37
63.64
25.74
59.99
1154.23
71.01
975
09/12/11
11.55
379.94
14.87
162.42
63.59
25.89
60.14
1162.27
71.84
976
09/13/11
11.63
384.62
15.26
163.43
63.61
26.04
60.54
1172.87
71.65
977
09/14/11
11.73
389.30
15.64
167.24
63.73
26.50
61.58
1188.68
72.64
978
09/15/11
11.98
392.96
16.08
170.09
64.40
26.99
63.22
1209.11
74.01
979
09/16/11
11.97
400.50
16.33
172.99
64.59
27.12
62.05
1216.01
74.55
980
09/19/11
11.58
411.63
16.18
173.13
64.14
27.21
60.56
1204.09
73.70
981
09/20/11
11.25
413.45
16.04
174.72
64.22
26.98
60.39
1202.09
74.01
982
09/21/11
10.84
412.14
15.38
173.02
63.13
25.99
60.79
1166.76
71.97
983
09/22/11
10.11
401.82
15.04
168.62
61.92
25.06
60.92
1129.56
69.24
984
09/23/11
10.07
404.30
15.21
169.34
61.59
25.06
60.34
1136.43
69.31
985
09/26/11
10.45
403.17
15.57
174.51
62.69
25.44
61.89
1162.95
71.72
986
09/27/11
10.48
399.26
15.76
177.71
63.82
25.67
62.43
1175.38
72.91
987
09/28/11
9.97
397.01
15.45
177.55
63.25
25.58
61.97
1151.06
72.07
988
09/29/11
10.06
390.57
15.86
179.17
63.90
25.45
62.58
1160.40
73.88
989
09/30/11
9.57
381.32
15.22
174.87
63.69
24.89
61.90
1131.42
72.63
990
10/03/11
8.90
374.60
14.69
173.29
62.08
24.53
60.29
1099.23
71.15
991
10/04/11
9.12
372.50
14.86
174.74
62.17
25.34
60.45
1123.95
72.83
992
10/05/11
9.37
378.25
15.27
176.85
62.35
25.89
60.29
1144.03
73.95
993
10/06/11
9.88
377.37
15.53
181.69
62.81
26.34
60.57
1164.97
73.89
994
10/07/11
9.71
369.80
15.50
182.39
63.13
26.25
61.02
1155.46
73.56
995
10/10/11
10.09
388.81
16.14
186.62
64.43
26.94
61.87
1194.89
76.28
996
10/11/11
10.30
400.29
16.14
185.00
63.96
27.00
60.95
1195.54
76.27
997
10/12/11
10.05
402.19
16.40
186.12
64.33
26.96
62.70
1207.25
77.16
998
10/13/11
10.10
408.43
16.22
186.82
64.23
27.18
62.36
1203.66
76.37
999
10/14/11
10.26
422.00
16.60
190.53
64.72
27.27
62.24
1224.58
78.11
1000 rows × 10 columns
In [19]:
sd.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
date 1000 non-null object
AA 1000 non-null float64
AAPL 1000 non-null float64
GE 1000 non-null float64
IBM 1000 non-null float64
JNJ 1000 non-null float64
MSFT 1000 non-null float64
PEP 1000 non-null float64
SPX 1000 non-null float64
XOM 1000 non-null float64
dtypes: float64(9), object(1)
memory usage: 78.2+ KB
In [20]:
sd.describe()
Out[20]:
AA
AAPL
GE
IBM
JNJ
MSFT
PEP
SPX
XOM
count
1000.000000
1000.000000
1000.000000
1000.000000
1000.000000
1000.000000
1000.000000
1000.000000
1000.000000
mean
17.138190
217.386950
18.362250
122.687010
58.228900
24.947840
59.607930
1153.729090
71.513700
std
9.108915
90.627523
6.168905
26.797665
4.515169
3.746426
6.531849
183.562134
8.131698
min
5.100000
78.200000
6.190000
68.140000
42.760000
14.330000
42.460000
676.530000
54.850000
25%
11.027500
142.965000
14.690000
102.200000
55.867500
23.570000
56.535000
1048.495000
65.065000
50%
13.470000
193.970000
16.085000
120.340000
58.960000
25.375000
61.720000
1165.770000
70.905000
75%
17.427500
302.777500
20.282500
137.597500
61.110000
27.250000
63.785000
1306.157500
79.222500
max
41.860000
422.000000
34.970000
190.530000
67.320000
34.070000
71.250000
1549.380000
87.480000
In [21]:
sd.cov()
Out[21]:
AA
AAPL
GE
IBM
JNJ
MSFT
PEP
SPX
XOM
AA
82.972337
-113.011047
52.831947
-17.786667
12.452707
18.539940
29.566472
1301.053810
49.412064
AAPL
-113.011047
8213.347902
-12.674670
2325.571842
280.511813
142.374856
381.760015
7767.704237
126.952600
GE
52.831947
-12.674670
38.055390
-0.717170
10.727853
14.545879
24.925419
959.485857
34.785142
IBM
-17.786667
2325.571842
-0.717170
718.114830
87.545129
39.272088
109.993750
2378.040223
55.483080
JNJ
12.452707
280.511813
10.727853
87.545129
20.386753
11.157274
24.301897
587.304861
13.419088
MSFT
18.539940
142.374856
14.545879
39.272088
11.157274
14.035711
18.973631
559.305215
10.678607
PEP
29.566472
381.760015
24.925419
109.993750
24.301897
18.973631
42.665045
1059.998821
23.187945
SPX
1301.053810
7767.704237
959.485857
2378.040223
587.304861
559.305215
1059.998821
33695.057033
1035.010688
XOM
49.412064
126.952600
34.785142
55.483080
13.419088
10.678607
23.187945
1035.010688
66.124510
In [22]:
sd.corr()
Out[22]:
AA
AAPL
GE
IBM
JNJ
MSFT
PEP
SPX
XOM
AA
1.000000
-0.136897
0.940204
-0.072867
0.302777
0.543281
0.496932
0.778118
0.667091
AAPL
-0.136897
1.000000
-0.022671
0.957575
0.685515
0.419330
0.644903
0.466928
0.172266
GE
0.940204
-0.022671
1.000000
-0.004338
0.385151
0.629383
0.618583
0.847320
0.693433
IBM
-0.072867
0.957575
-0.004338
1.000000
0.723537
0.391174
0.628398
0.483436
0.254614
JNJ
0.302777
0.685515
0.385151
0.723537
1.000000
0.659579
0.824005
0.708609
0.365483
MSFT
0.543281
0.419330
0.629383
0.391174
0.659579
1.000000
0.775349
0.813296
0.350523
PEP
0.496932
0.644903
0.618583
0.628398
0.824005
0.775349
1.000000
0.884069
0.436561
SPX
0.778118
0.466928
0.847320
0.483436
0.708609
0.813296
0.884069
1.000000
0.693395
XOM
0.667091
0.172266
0.693433
0.254614
0.365483
0.350523
0.436561
0.693395
1.000000
In [23]:
plt.matshow(sd.corr())
Out[23]:
<matplotlib.image.AxesImage at 0x119ff99b0>
In [24]:
import seaborn as sns
f, ax = plt.subplots(figsize=(10, 8))
corr = sd.corr()
sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),
square=True, ax=ax)
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f628b00>
In [ ]:
sd['AAPL'].rolling(5).mean()
In [25]:
sd['AAPL'].rolling(5).apply(get_range)
Out[25]:
0 NaN
1 NaN
2 NaN
3 NaN
4 4.86
5 3.77
6 5.61
7 5.61
8 16.32
9 26.42
10 38.03
11 32.54
12 21.71
13 16.20
14 16.20
15 6.01
16 4.90
17 4.90
18 7.59
19 8.59
20 6.35
21 11.76
22 12.75
23 11.75
24 9.48
25 5.43
26 6.64
27 11.09
28 15.44
29 14.49
...
970 15.94
971 15.94
972 10.78
973 10.09
974 10.09
975 6.66
976 7.14
977 11.82
978 15.48
979 20.56
980 27.01
981 24.15
982 20.49
983 12.95
984 11.63
985 11.63
986 12.88
987 7.29
988 13.73
989 21.85
990 24.66
991 24.51
992 18.07
993 8.82
994 8.45
995 19.01
996 30.49
997 32.39
998 38.63
999 33.19
Name: AAPL, Length: 1000, dtype: float64
In [26]:
sd_apple = sd[['AAPL']].copy()
sd_apple
Out[26]:
AAPL
0
185.09
1
187.00
2
189.95
3
187.44
4
187.87
5
186.18
6
191.79
7
186.30
8
175.47
9
165.37
10
153.76
11
169.96
12
166.11
13
164.30
14
166.39
15
163.95
16
168.85
17
168.46
18
171.54
19
172.54
20
174.81
21
180.22
22
184.29
23
182.22
24
178.86
25
179.81
26
185.50
27
189.95
28
194.30
29
194.21
...
...
970
374.05
971
379.74
972
383.93
973
384.14
974
377.48
975
379.94
976
384.62
977
389.30
978
392.96
979
400.50
980
411.63
981
413.45
982
412.14
983
401.82
984
404.30
985
403.17
986
399.26
987
397.01
988
390.57
989
381.32
990
374.60
991
372.50
992
378.25
993
377.37
994
369.80
995
388.81
996
400.29
997
402.19
998
408.43
999
422.00
1000 rows × 1 columns
In [27]:
sd_apple['returns'] = np.log(sd_apple['AAPL'] / sd_apple['AAPL'].shift(1))
sd_apple
Out[27]:
AAPL
returns
0
185.09
NaN
1
187.00
0.010266
2
189.95
0.015652
3
187.44
-0.013302
4
187.87
0.002291
5
186.18
-0.009036
6
191.79
0.029687
7
186.30
-0.029043
8
175.47
-0.059890
9
165.37
-0.059283
10
153.76
-0.072792
11
169.96
0.100170
12
166.11
-0.022913
13
164.30
-0.010956
14
166.39
0.012640
15
163.95
-0.014773
16
168.85
0.029449
17
168.46
-0.002312
18
171.54
0.018118
19
172.54
0.005813
20
174.81
0.013071
21
180.22
0.030479
22
184.29
0.022332
23
182.22
-0.011296
24
178.86
-0.018611
25
179.81
0.005297
26
185.50
0.031154
27
189.95
0.023706
28
194.30
0.022642
29
194.21
-0.000463
...
...
...
970
374.05
-0.018489
971
379.74
0.015097
972
383.93
0.010973
973
384.14
0.000547
974
377.48
-0.017489
975
379.94
0.006496
976
384.62
0.012242
977
389.30
0.012094
978
392.96
0.009358
979
400.50
0.019006
980
411.63
0.027411
981
413.45
0.004412
982
412.14
-0.003173
983
401.82
-0.025359
984
404.30
0.006153
985
403.17
-0.002799
986
399.26
-0.009745
987
397.01
-0.005651
988
390.57
-0.016354
989
381.32
-0.023968
990
374.60
-0.017780
991
372.50
-0.005622
992
378.25
0.015318
993
377.37
-0.002329
994
369.80
-0.020264
995
388.81
0.050128
996
400.29
0.029098
997
402.19
0.004735
998
408.43
0.015396
999
422.00
0.032685
1000 rows × 2 columns
In [28]:
sd_apple['SMA_1'] = sd_apple['AAPL'].rolling(60).mean()
sd_apple
Out[28]:
AAPL
returns
SMA_1
0
185.09
NaN
NaN
1
187.00
0.010266
NaN
2
189.95
0.015652
NaN
3
187.44
-0.013302
NaN
4
187.87
0.002291
NaN
5
186.18
-0.009036
NaN
6
191.79
0.029687
NaN
7
186.30
-0.029043
NaN
8
175.47
-0.059890
NaN
9
165.37
-0.059283
NaN
10
153.76
-0.072792
NaN
11
169.96
0.100170
NaN
12
166.11
-0.022913
NaN
13
164.30
-0.010956
NaN
14
166.39
0.012640
NaN
15
163.95
-0.014773
NaN
16
168.85
0.029449
NaN
17
168.46
-0.002312
NaN
18
171.54
0.018118
NaN
19
172.54
0.005813
NaN
20
174.81
0.013071
NaN
21
180.22
0.030479
NaN
22
184.29
0.022332
NaN
23
182.22
-0.011296
NaN
24
178.86
-0.018611
NaN
25
179.81
0.005297
NaN
26
185.50
0.031154
NaN
27
189.95
0.023706
NaN
28
194.30
0.022642
NaN
29
194.21
-0.000463
NaN
...
...
...
...
970
374.05
-0.018489
363.005167
971
379.74
0.015097
363.902500
972
383.93
0.010973
364.858000
973
384.14
0.000547
365.719667
974
377.48
-0.017489
366.565167
975
379.94
0.006496
367.478167
976
384.62
0.012242
368.550833
977
389.30
0.012094
369.783833
978
392.96
0.009358
370.911500
979
400.50
0.019006
372.209667
980
411.63
0.027411
373.549667
981
413.45
0.004412
375.001333
982
412.14
-0.003173
376.336333
983
401.82
-0.025359
377.445667
984
404.30
0.006153
378.616667
985
403.17
-0.002799
379.741667
986
399.26
-0.009745
380.675000
987
397.01
-0.005651
381.468000
988
390.57
-0.016354
382.114833
989
381.32
-0.023968
382.516833
990
374.60
-0.017780
382.765000
991
372.50
-0.005622
383.073333
992
378.25
0.015318
383.481667
993
377.37
-0.002329
383.804167
994
369.80
-0.020264
384.004667
995
388.81
0.050128
384.402833
996
400.29
0.029098
384.844333
997
402.19
0.004735
385.266667
998
408.43
0.015396
385.625500
999
422.00
0.032685
386.204000
1000 rows × 3 columns
In [29]:
sd_apple['SMA_2'] = sd_apple['AAPL'].rolling(10).mean()
sd_apple
Out[29]:
AAPL
returns
SMA_1
SMA_2
0
185.09
NaN
NaN
NaN
1
187.00
0.010266
NaN
NaN
2
189.95
0.015652
NaN
NaN
3
187.44
-0.013302
NaN
NaN
4
187.87
0.002291
NaN
NaN
5
186.18
-0.009036
NaN
NaN
6
191.79
0.029687
NaN
NaN
7
186.30
-0.029043
NaN
NaN
8
175.47
-0.059890
NaN
NaN
9
165.37
-0.059283
NaN
184.246
10
153.76
-0.072792
NaN
181.113
11
169.96
0.100170
NaN
179.409
12
166.11
-0.022913
NaN
177.025
13
164.30
-0.010956
NaN
174.711
14
166.39
0.012640
NaN
172.563
15
163.95
-0.014773
NaN
170.340
16
168.85
0.029449
NaN
168.046
17
168.46
-0.002312
NaN
166.262
18
171.54
0.018118
NaN
165.869
19
172.54
0.005813
NaN
166.586
20
174.81
0.013071
NaN
168.691
21
180.22
0.030479
NaN
169.717
22
184.29
0.022332
NaN
171.535
23
182.22
-0.011296
NaN
173.327
24
178.86
-0.018611
NaN
174.574
25
179.81
0.005297
NaN
176.160
26
185.50
0.031154
NaN
177.825
27
189.95
0.023706
NaN
179.974
28
194.30
0.022642
NaN
182.250
29
194.21
-0.000463
NaN
184.417
...
...
...
...
...
970
374.05
-0.018489
363.005167
378.339
971
379.74
0.015097
363.902500
380.669
972
383.93
0.010973
364.858000
381.702
973
384.14
0.000547
365.719667
382.498
974
377.48
-0.017489
366.565167
382.874
975
379.94
0.006496
367.478167
382.510
976
384.62
0.012242
368.550833
381.975
977
389.30
0.012094
369.783833
381.906
978
392.96
0.009358
370.911500
382.719
979
400.50
0.019006
372.209667
384.666
980
411.63
0.027411
373.549667
388.424
981
413.45
0.004412
375.001333
391.795
982
412.14
-0.003173
376.336333
394.616
983
401.82
-0.025359
377.445667
396.384
984
404.30
0.006153
378.616667
399.066
985
403.17
-0.002799
379.741667
401.389
986
399.26
-0.009745
380.675000
402.853
987
397.01
-0.005651
381.468000
403.624
988
390.57
-0.016354
382.114833
403.385
989
381.32
-0.023968
382.516833
401.467
990
374.60
-0.017780
382.765000
397.764
991
372.50
-0.005622
383.073333
393.669
992
378.25
0.015318
383.481667
390.280
993
377.37
-0.002329
383.804167
387.835
994
369.80
-0.020264
384.004667
384.385
995
388.81
0.050128
384.402833
382.949
996
400.29
0.029098
384.844333
383.052
997
402.19
0.004735
385.266667
383.570
998
408.43
0.015396
385.625500
385.356
999
422.00
0.032685
386.204000
389.424
1000 rows × 4 columns
In [30]:
sd_apple[['AAPL','SMA_1', 'SMA_2']].plot(figsize=(10,6))
Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f752f28>
In [ ]:
Content source: HydPy/HydPy-meetups
Similar notebooks: