Chapter_07_Part_02

import pandas as pd
import numpy as np
from pandas import DataFrame, Series

Data transformation

Removing duplicates

data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,
'k2': [1, 1, 2, 3, 3, 4, 4]})
data

k1
k2

0
one
1

1
one
1

2
one
2

3
two
3

4
two
3

5
two
4

6
two
4

data.duplicated()

0    False
1     True
2    False
3    False
4     True
5    False
6     True
dtype: bool

data.drop_duplicates()

k1
k2

0
one
1

2
one
2

3
two
3

5
two
4

data['v1'] = np.arange(7)
data

k1
k2
v1

0
one
1
0

1
one
1
1

2
one
2
2

3
two
3
3

4
two
3
4

5
two
4
5

6
two
4
6

data.drop_duplicates(['k1'])

k1
k2
v1

0
one
1
0

3
two
3
3

data.drop_duplicates(['k1', 'k2'], keep = 'last')

k1
k2
v1

1
one
1
1

2
one
2
2

4
two
3
4

6
two
4
6

Transforming data using a function or mapping

data = DataFrame({'food': ['bacon', 'pulled pork', 'bacon', 'Pastrami',
'corned beef', 'Bacon', 'pastrami', 'honey ham',
'nova lox'],
'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
data

food
ounces

0
bacon
4.0

1
pulled pork
3.0

2
bacon
12.0

3
Pastrami
6.0

4
corned beef
7.5

5
Bacon
8.0

6
pastrami
3.0

7
honey ham
5.0

8
nova lox
6.0

meat_to_animal = {
'bacon': 'pig',
'pulled pork': 'pig',
'pastrami': 'cow',
'corned beef': 'cow',
'honey ham': 'pig',
'nova lox': 'salmon'
}

data['animal'] = data['food'].map(str.lower).map(meat_to_animal)
data

food
ounces
animal

0
bacon
4.0
pig

1
pulled pork
3.0
pig

2
bacon
12.0
pig

3
Pastrami
6.0
cow

4
corned beef
7.5
cow

5
Bacon
8.0
pig

6
pastrami
3.0
cow

7
honey ham
5.0
pig

8
nova lox
6.0
salmon

data['food'].map(lambda x: meat_to_animal[x.lower()])

0       pig
1       pig
2       pig
3       cow
4       cow
5       pig
6       cow
7       pig
8    salmon
Name: food, dtype: object

Replacing values

data = Series([1., -999., 2., -999., -1000., 3.])
data

0       1.0
1    -999.0
2       2.0
3    -999.0
4   -1000.0
5       3.0
dtype: float64

data.replace(-999, np.nan)

0       1.0
1       NaN
2       2.0
3       NaN
4   -1000.0
5       3.0
dtype: float64

data.replace([-999, -1000], np.nan)

0    1.0
1    NaN
2    2.0
3    NaN
4    NaN
5    3.0
dtype: float64

data.replace([-999, -1000], [np.nan, 0])

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

data.replace({-999: np.nan, -1000: 0})

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

Renaming axis indexes

data = DataFrame(np.arange(12).reshape((3, 4)),
columns=['one', 'two', 'three', 'four'])

data.index.map(str.upper)

data

one
two
three
four

Ohio
0
1
2
3

4
5
6
7

New York
8
9
10
11

data.index = data.index.map(str.upper)
data

one
two
three
four

OHIO
0
1
2
3

4
5
6
7

NEW YORK
8
9
10
11

data.rename(index = str.title, columns = str.upper)

ONE
TWO
THREE
FOUR

Ohio
0
1
2
3

4
5
6
7

New York
8
9
10
11

data

one
two
three
four

OHIO
0
1
2
3

4
5
6
7

NEW YORK
8
9
10
11

data.rename(index={'OHIO': 'INDIANA'},
columns={'three': 'peekaboo'})

one
two
peekaboo
four

INDIANA
0
1
2
3

4
5
6
7

NEW YORK
8
9
10
11

_ = data.rename(index={'OHIO': 'INDIANA'}, inplace=True)
data

one
two
three
four

INDIANA
0
1
2
3

4
5
6
7

NEW YORK
8
9
10
11

Discretization and binning

ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]
bins = [18, 25, 35, 60, 100]
cats = pd.cut(ages, bins)
cats

[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, object): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]

cats.codes

array([0, 0, 0, 1, 0, 0, 2, 1, 3, 2, 2, 1], dtype=int8)

cats.value_counts()

(18, 25]     5
(25, 35]     3
(35, 60]     3
(60, 100]    1
dtype: int64

pd.cut(ages, bins, right = False)

[[18, 25), [18, 25), [25, 35), [25, 35), [18, 25), ..., [25, 35), [60, 100), [35, 60), [35, 60), [25, 35)]
Length: 12
Categories (4, object): [[18, 25) < [25, 35) < [35, 60) < [60, 100)]

group_names = ['Youth', 'YoungAdult', 'MiddleAged', 'Senior']
pd.cut(ages, bins, labels=group_names)

Length: 12
Categories (4, object): [Youth < YoungAdult < MiddleAged < Senior]

data = np.random.rand(20)
pd.cut(data, 4, precision = 2)

[(0.064, 0.29], (0.74, 0.97], (0.29, 0.52], (0.29, 0.52], (0.74, 0.97], ..., (0.52, 0.74], (0.74, 0.97], (0.74, 0.97], (0.064, 0.29], (0.29, 0.52]]
Length: 20
Categories (4, object): [(0.064, 0.29] < (0.29, 0.52] < (0.52, 0.74] < (0.74, 0.97]]

pd.cut(data, 4, precision = 2).value_counts()

(0.064, 0.29]    6
(0.29, 0.52]     5
(0.52, 0.74]     2
(0.74, 0.97]     7
dtype: int64

data = np.random.randn(1000)
cats = pd.qcut(data, 4)
cats

[[-3.568, -0.667], [-3.568, -0.667], (0.687, 2.922], (-0.667, -0.0231], (-0.667, -0.0231], ..., (-0.0231, 0.687], [-3.568, -0.667], (-0.667, -0.0231], (-0.667, -0.0231], [-3.568, -0.667]]
Length: 1000
Categories (4, object): [[-3.568, -0.667] < (-0.667, -0.0231] < (-0.0231, 0.687] < (0.687, 2.922]]

cats.value_counts()

[-3.568, -0.667]     250
(-0.667, -0.0231]    250
(-0.0231, 0.687]     250
(0.687, 2.922]       250
dtype: int64

pd.qcut(data, [0, 0.1, 0.5, 0.9, 1.])

[(-1.277, -0.0231], [-3.568, -1.277], (1.303, 2.922], (-1.277, -0.0231], (-1.277, -0.0231], ..., (-0.0231, 1.303], (-1.277, -0.0231], (-1.277, -0.0231], (-1.277, -0.0231], [-3.568, -1.277]]
Length: 1000
Categories (4, object): [[-3.568, -1.277] < (-1.277, -0.0231] < (-0.0231, 1.303] < (1.303, 2.922]]

Detecting and filtering outliers

np.random.seed(12345)
data = DataFrame(np.random.randn(1000, 4))
data.describe()

0
1
2
3

count
1000.000000
1000.000000
1000.000000
1000.000000

mean
-0.067684
0.067924
0.025598
-0.002298

std
0.998035
0.992106
1.006835
0.996794

min
-3.428254
-3.548824
-3.184377
-3.745356

25%
-0.774890
-0.591841
-0.641675
-0.644144

50%
-0.116401
0.101143
0.002073
-0.013611

75%
0.616366
0.780282
0.680391
0.654328

max
3.366626
2.653656
3.260383
3.927528

col = data[3]
col[np.abs(col) > 3]

97     3.927528
305   -3.399312
400   -3.745356
Name: 3, dtype: float64

data[(np.abs(data) > 3).any(1)]

0
1
2
3

5
-0.539741
0.476985
3.248944
-1.021228

97
-0.774363
0.552936
0.106061
3.927528

102
-0.655054
-0.565230
3.176873
0.959533

305
-2.315555
0.457246
-0.025907
-3.399312

324
0.050188
1.951312
3.260383
0.963301

400
0.146326
0.508391
-0.196713
-3.745356

499
-0.293333
-0.242459
-3.056990
1.918403

523
-3.428254
-0.296336
-0.439938
-0.867165

586
0.275144
1.179227
-3.184377
1.369891

808
-0.362528
-3.548824
1.553205
-2.186301

900
3.366626
-2.372214
0.851010
1.332846

data = np.where(np.abs(data) > 3, np.sign(data) * 3, data)
data = DataFrame(data)
data.describe()

0
1
2
3

count
1000.000000
1000.000000
1000.000000
1000.000000

mean
-0.067623
0.068473
0.025153
-0.002081

std
0.995485
0.990253
1.003977
0.989736

min
-3.000000
-3.000000
-3.000000
-3.000000

25%
-0.774890
-0.591841
-0.641675
-0.644144

50%
-0.116401
0.101143
0.002073
-0.013611

75%
0.616366
0.780282
0.680391
0.654328

max
3.000000
2.653656
3.000000
3.000000

Permutation and random sampling

df = DataFrame(np.arange(5 * 4).reshape((5, 4)))
sampler = np.random.permutation(5)
sampler

array([1, 0, 2, 3, 4])

df

0
1
2
3

0
0
1
2
3

1
4
5
6
7

2
8
9
10
11

3
12
13
14
15

4
16
17
18
19

df.take(sampler)

0
1
2
3

1
4
5
6
7

0
0
1
2
3

2
8
9
10
11

3
12
13
14
15

4
16
17
18
19

df.take(np.random.permutation(len(df))[:3])

0
1
2
3

1
4
5
6
7

3
12
13
14
15

4
16
17
18
19

bag = np.array([5, 7, -1, 6, 4])
sampler = np.random.randint(0, len(bag), size = 10)
sampler

array([4, 4, 2, 2, 2, 0, 3, 0, 4, 1])

draws = bag.take(sampler)
draws

array([ 4,  4, -1, -1, -1,  5,  6,  5,  4,  7])

Computing indicator / dummy variables

df = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],
'data1': range(6)})
df

data1
key

0
0
b

1
1
b

2
2
a

3
3
c

4
4
a

5
5
b

dummies = pd.get_dummies(df['key'])
dummies

a
b
c

0
0
1
0

1
0
1
0

2
1
0
0

3
0
0
1

4
1
0
0

5
0
1
0

pd.get_dummies(df['key'], prefix='key_')

key__a
key__b
key__c

0
0
1
0

1
0
1
0

2
1
0
0

3
0
0
1

4
1
0
0

5
0
1
0

df_with_dummy = df[['data1']].join(dummies)
df_with_dummy

data1
a
b
c

0
0
0
1
0

1
1
0
1
0

2
2
1
0
0

3
3
0
0
1

4
4
1
0
0

5
5
0
1
0

mnames = ['movie_id', 'title', 'genres']
names=mnames, engine = 'python')
movies[:10]

movie_id
title
genres

0
1
Toy Story (1995)
Animation|Children's|Comedy

1
2
Jumanji (1995)

2
3
Grumpier Old Men (1995)
Comedy|Romance

3
4
Waiting to Exhale (1995)
Comedy|Drama

4
5
Father of the Bride Part II (1995)
Comedy

5
6
Heat (1995)
Action|Crime|Thriller

6
7
Sabrina (1995)
Comedy|Romance

7
8
Tom and Huck (1995)

8
9
Sudden Death (1995)
Action

9
10
GoldenEye (1995)

genre_iter = (set(x.split('|')) for x in movies.genres)
print(type(genre_iter))
genres = sorted(set.union(*genre_iter))
genres

<class 'generator'>

Out[73]:

['Action',
'Animation',
"Children's",
'Comedy',
'Crime',
'Documentary',
'Drama',
'Fantasy',
'Film-Noir',
'Horror',
'Musical',
'Mystery',
'Romance',
'Sci-Fi',
'Thriller',
'War',
'Western']

dummies = DataFrame(np.zeros((len(movies), len(genres))).astype(np.int32), columns = genres)
dummies

Action
Animation
Children's
Comedy
Crime
Documentary
Drama
Fantasy
Film-Noir
Horror
Musical
Mystery
Romance
Sci-Fi
Thriller
War
Western

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

2
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

6
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

7
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

8
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

9
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

10
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

11
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

12
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

13
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

14
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

15
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

16
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

17
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

18
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

19
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

20
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

21
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

22
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

23
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

24
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

25
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

26
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

27
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

28
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

29
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...

3853
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3854
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3855
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3856
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3857
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3858
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3859
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3860
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3861
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3862
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3863
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3864
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3865
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3866
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3867
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3868
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3869
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3870
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3871
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3872
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3873
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3874
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3875
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3876
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3877
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3878
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3879
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3880
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3881
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3882
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3883 rows × 18 columns

for i, gen in enumerate(movies.genres):
dummies.ix[i, gen.split('|')] = 1
dummies

Action
Animation
Children's
Comedy
Crime
Documentary
Drama
Fantasy
Film-Noir
Horror
Musical
Mystery
Romance
Sci-Fi
Thriller
War
Western

0
0
0
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0

1
0
1
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0

2
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0

3
0
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0

4
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

5
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0

6
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0

7
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0

8
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

9
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0

10
0
0
0
0
1
0
0
1
0
0
0
0
0
1
0
0
0
0

11
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
0
0
0

12
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0

13
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

14
1
1
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0

15
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0

16
0
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0

17
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0

18
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

19
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

20
1
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0

21
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
1
0
0

22
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0

23
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0

24
0
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0

25
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

26
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

27
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0

28
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0

29
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...

3853
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0

3854
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

3855
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

3856
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0

3857
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0

3858
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
0
0
0

3859
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

3860
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3861
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0

3862
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0

3863
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0

3864
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0

3865
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3866
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0

3867
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0

3868
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3869
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3870
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3871
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3872
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0

3873
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

3874
0
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0

3875
0
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0

3876
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0

3877
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0

3878
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0

3879
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

3880
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

3881
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0

3882
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0

3883 rows × 18 columns

movies_windic.ix[0]

movie_id                                       1
title                           Toy Story (1995)
genres               Animation|Children's|Comedy
Genre_Action                                   0
Genre_Animation                                1
Genre_Children's                               1
Genre_Comedy                                   1
Genre_Crime                                    0
Genre_Documentary                              0
Genre_Drama                                    0
Genre_Fantasy                                  0
Genre_Film-Noir                                0
Genre_Horror                                   0
Genre_Musical                                  0
Genre_Mystery                                  0
Genre_Romance                                  0
Genre_Sci-Fi                                   0
Genre_Thriller                                 0
Genre_War                                      0
Genre_Western                                  0
Name: 0, dtype: object

values = np.random.rand(10)
values

array([ 0.75603383,  0.90830844,  0.96588737,  0.17373658,  0.87592824,
0.75415641,  0.163486  ,  0.23784062,  0.85564381,  0.58743194])

bins = [0, 0.2, 0.4, 0.6, 0.8, 1]
pd.get_dummies(pd.cut(values, bins))

(0, 0.2]
(0.2, 0.4]
(0.4, 0.6]
(0.6, 0.8]
(0.8, 1]

0
0
0
0
1
0

1
0
0
0
0
1

2
0
0
0
0
1

3
1
0
0
0
0

4
0
0
0
0
1

5
0
0
0
1
0

6
1
0
0
0
0

7
0
1
0
0
0

8
0
0
0
0
1

9
0
0
1
0
0

