notebook.community

Edit and run



In [167]:

    
import pandas as pd



In [197]:

    
df = pd.read_csv("FinalMerge.csv", encoding="latin1")
#df.shape 2384, 19
df['Production'] = df['Production'].str.split(' ').str.get(0)
#df.groupby('Production')['Production'].count()
#prod = df.Production.unique()
#print(prod)
#len(prod)
# 312 Production companies

df = pd.concat([df, df['Genre'].str.get_dummies(sep=', ')], axis=1) 
#df.shape  2384, 42
#list(df[df.columns[19:42]])



In [198]:

    
df['Thriller'] = df[['Thriller', 'Horror']].sum(axis=1)
df['Fantasy'] = df[['Fantasy', 'Sci-Fi']].sum(axis=1)
df['Other'] = df[['Music', 'History', 'Sport', 'War', 'Western', 'Musical', 'Documentary', 'News', 'Short']].sum(axis=1)
df.drop(['Music', 'History', 'Sport', 'War', 'Western', 'Musical', 'Documentary', 'News', 'Short', 'Horror', 'Sci-Fi'], axis=1, inplace=True)
#df.shape 2384 32
#df









    Out[198]:





(2384, 32)



In [206]:

    
variables = list(df)[19:32]
for x in variables:
    #print(x)
    df.loc[df['%s' % x] > 1, '%s' % x] = 1
    #print(df['%s' % x].value_counts())









    



Action
0    1744
1     640
Name: Action, dtype: int64
Adventure
0    1875
1     509
Name: Adventure, dtype: int64
Animation
0    2215
1     169
Name: Animation, dtype: int64
Biography
0    2212
1     172
Name: Biography, dtype: int64
Comedy
0    1542
1     842
Name: Comedy, dtype: int64
Crime
0    1971
1     413
Name: Crime, dtype: int64
Drama
1    1234
0    1150
Name: Drama, dtype: int64
Family
0    2226
1     158
Name: Family, dtype: int64
Fantasy
0    2023
1     361
Name: Fantasy, dtype: int64
Mystery
0    2177
1     207
Name: Mystery, dtype: int64
Romance
0    1990
1     394
Name: Romance, dtype: int64
Thriller
0    1841
1     543
Name: Thriller, dtype: int64
Other
0    2104
1     280
Name: Other, dtype: int64



In [204]:









    Out[204]:





0    2104
1     280
Name: Other, dtype: int64



In [ ]: