In [1]:
import pandas as pd
In [2]:
review = pd.read_csv('../../../data/interim/US_cities_only/review_US.csv')
In [45]:
review.head()
Out[45]:
In [44]:
review.info()
In [38]:
review.describe()
Out[38]:
In [3]:
# Cleaning the 'date' column
review['date'] = pd.to_datetime(review['date'])
In [4]:
# Cleaning the 'useful' column
review['useful'] = review['useful'].fillna(0)
review['useful'] = review['useful'].map(int)
In [13]:
# Reading clean vegas_three_years reviews and writing 2016 reviews
review = pd.read_csv('../../../data/interim/clean_US_cities/reviews_vegas_threeyears.csv')
review['date'] = pd.to_datetime(review['date'])
review = review.loc[review['date'] > '2015-12-31']
review.to_csv('../../../data/interim/clean_US_cities/2016_review.csv', encoding='utf-8', index=False)
In [46]:
# Writing clean 'review' dataframe to csv
review.to_csv('../../../data/interim/clean_US_cities/review_clean.csv', encoding='utf-8', index=False)