In [167]:
import pandas as pd

In [197]:
df = pd.read_csv("FinalMerge.csv", encoding="latin1")
#df.shape 2384, 19
df['Production'] = df['Production'].str.split(' ').str.get(0)
#df.groupby('Production')['Production'].count()
#prod = df.Production.unique()
#print(prod)
#len(prod)
# 312 Production companies

df = pd.concat([df, df['Genre'].str.get_dummies(sep=', ')], axis=1) 
#df.shape  2384, 42
#list(df[df.columns[19:42]])

In [198]:
df['Thriller'] = df[['Thriller', 'Horror']].sum(axis=1)
df['Fantasy'] = df[['Fantasy', 'Sci-Fi']].sum(axis=1)
df['Other'] = df[['Music', 'History', 'Sport', 'War', 'Western', 'Musical', 'Documentary', 'News', 'Short']].sum(axis=1)
df.drop(['Music', 'History', 'Sport', 'War', 'Western', 'Musical', 'Documentary', 'News', 'Short', 'Horror', 'Sci-Fi'], axis=1, inplace=True)
#df.shape 2384 32
#df


Out[198]:
(2384, 32)

In [206]:
variables = list(df)[19:32]
for x in variables:
    #print(x)
    df.loc[df['%s' % x] > 1, '%s' % x] = 1
    #print(df['%s' % x].value_counts())


Action
0    1744
1     640
Name: Action, dtype: int64
Adventure
0    1875
1     509
Name: Adventure, dtype: int64
Animation
0    2215
1     169
Name: Animation, dtype: int64
Biography
0    2212
1     172
Name: Biography, dtype: int64
Comedy
0    1542
1     842
Name: Comedy, dtype: int64
Crime
0    1971
1     413
Name: Crime, dtype: int64
Drama
1    1234
0    1150
Name: Drama, dtype: int64
Family
0    2226
1     158
Name: Family, dtype: int64
Fantasy
0    2023
1     361
Name: Fantasy, dtype: int64
Mystery
0    2177
1     207
Name: Mystery, dtype: int64
Romance
0    1990
1     394
Name: Romance, dtype: int64
Thriller
0    1841
1     543
Name: Thriller, dtype: int64
Other
0    2104
1     280
Name: Other, dtype: int64

In [204]:



Out[204]:
0    2104
1     280
Name: Other, dtype: int64

In [ ]: