In [1]:
df = pd.read_csv('../resource/preprocess_df.csv')
df.drop(['title', 'director', 'actors', 'film_rate', 'genre', 'nation'], axis=1, inplace=True)
sparse_df = pd.read_csv('./resource/tfidf_df.csv')
expect_df = pd.read_csv('./resource/expect_df.csv')
In [2]:
df.rename(columns={'0.5':'star0.5',
'1':'star1',
'1.5':'star1.5',
'2':'star2',
'2.5':'star2.5',
'3':'star3',
'3.5':'star3.5',
'4':'star4',
'4.5':'star4.5',
'5':'star5',
}, inplace=True)
df = df.fillna(3) #이동진 Nan값 => 이동진의 평균 별점 3점으로 imputation
In [3]:
final_df1 = pd.concat([df, sparse_df], axis=1)
final_df1.head()
Out[3]:
In [4]:
final_df1.to_csv('./resource/final_df1.csv', index=False)
In [5]:
df1 = df.ix[:,:'year']
final_df2 = pd.concat([df1, expect_df, sparse_df], axis=1).fillna(3)
final_df2.drop('mean', axis=1, inplace=True)
final_df2.head()
Out[5]:
In [6]:
final_df2.to_csv('./resource/final_df2.csv', index=False)
In [ ]: