Import Statement


In [1]:
import pandas as pd

Basics


In [2]:
%%time
df = pd.read_csv('sample_data.csv')


CPU times: user 22.1 s, sys: 9.06 s, total: 31.2 s
Wall time: 35 s

Bonus: Parquet


In [3]:
%%time
df = pd.read_parquet('sample_data_parquet')


CPU times: user 7.31 s, sys: 9.37 s, total: 16.7 s
Wall time: 12.9 s

In [4]:
df.head()


Out[4]:
alphabet useless_letter Z Y X W V U T S ... J I H G F E D C B A
index
0 ABCDEFGHIJKLMNOPQRSTUVWXYZ U 1 66 69 4 70 64 83 55 ... 36 28 33 57 23 86 48 30 91 84
1 ABCDEFGHIJKLMNOPQRSTUVWXYZ L 25 26 36 34 0 75 60 73 ... 42 91 8 24 64 13 43 47 94 11
2 ABCDEFGHIJKLMNOPQRSTUVWXYZ N 40 67 36 54 46 5 57 50 ... 82 76 24 60 3 55 64 28 26 89
3 ABCDEFGHIJKLMNOPQRSTUVWXYZ C 91 98 8 36 17 3 29 90 ... 88 24 7 51 52 87 1 6 19 48
4 ABCDEFGHIJKLMNOPQRSTUVWXYZ J 87 68 73 78 39 67 57 24 ... 9 14 0 2 51 18 95 71 28 13

5 rows × 28 columns


In [5]:
type(df)


Out[5]:
pandas.core.frame.DataFrame

In [6]:
df


Out[6]:
alphabet useless_letter Z Y X W V U T S ... J I H G F E D C B A
index
0 ABCDEFGHIJKLMNOPQRSTUVWXYZ U 1 66 69 4 70 64 83 55 ... 36 28 33 57 23 86 48 30 91 84
1 ABCDEFGHIJKLMNOPQRSTUVWXYZ L 25 26 36 34 0 75 60 73 ... 42 91 8 24 64 13 43 47 94 11
2 ABCDEFGHIJKLMNOPQRSTUVWXYZ N 40 67 36 54 46 5 57 50 ... 82 76 24 60 3 55 64 28 26 89
3 ABCDEFGHIJKLMNOPQRSTUVWXYZ C 91 98 8 36 17 3 29 90 ... 88 24 7 51 52 87 1 6 19 48
4 ABCDEFGHIJKLMNOPQRSTUVWXYZ J 87 68 73 78 39 67 57 24 ... 9 14 0 2 51 18 95 71 28 13
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8999995 ABCDEFGHIJKLMNOPQRSTUVWXYZ F 14 6 87 54 44 93 39 5 ... 25 31 72 35 31 20 38 19 92 55
8999996 ABCDEFGHIJKLMNOPQRSTUVWXYZ F 3 86 73 6 47 72 31 75 ... 55 60 72 60 85 58 34 79 40 22
8999997 ABCDEFGHIJKLMNOPQRSTUVWXYZ U 6 88 22 17 0 18 47 16 ... 41 68 56 17 15 7 29 14 86 9
8999998 ABCDEFGHIJKLMNOPQRSTUVWXYZ V 14 97 6 5 4 55 56 29 ... 71 86 38 92 4 20 0 58 96 2
8999999 ABCDEFGHIJKLMNOPQRSTUVWXYZ N 5 76 72 99 50 42 68 43 ... 30 63 68 65 84 46 46 41 52 15

9000000 rows × 28 columns

Computation


In [7]:
%%time
df['mean'] = df.mean(axis=1)


CPU times: user 36.7 s, sys: 45.7 s, total: 1min 22s
Wall time: 1min 56s

In [ ]: