In [1]:
import pandas as pd

In [2]:
tip = pd.read_csv('../../../data/interim/US_cities_only/tip_US.csv')

In [3]:
tip.head()


Out[3]:
business_id date likes text type user_id
0 dAa0hB2yrnHzVmsCkN4YvQ 2014-06-20 0 Nice place. Great staff. A fixture in the tow... tip oaYhjqBbh18ZhU0bpyzSuw
1 dAa0hB2yrnHzVmsCkN4YvQ 2016-10-12 0 Happy hour 5-7 Monday - Friday tip ulQ8Nyj7jCUR8M83SUMoRQ
2 SqW3igh1_Png336VIb5DUA 2016-07-03 0 Come early on Sunday's to avoid the rush tip ulQ8Nyj7jCUR8M83SUMoRQ
3 KNpcPGqDORDdvtekXd348w 2016-01-07 0 Love their soup! tip ulQ8Nyj7jCUR8M83SUMoRQ
4 KNpcPGqDORDdvtekXd348w 2016-05-22 0 Soups are fantastic! tip ulQ8Nyj7jCUR8M83SUMoRQ

In [4]:
# Cleaning 'date' column

tip['date'] = pd.to_datetime(tip['date'])

In [5]:
tip.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 833142 entries, 0 to 833141
Data columns (total 6 columns):
business_id    833142 non-null object
date           833142 non-null datetime64[ns]
likes          833142 non-null int64
text           833142 non-null object
type           833142 non-null object
user_id        833142 non-null object
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 38.1+ MB

In [6]:
tip.describe()


Out[6]:
likes
count 833142.000000
mean 0.016962
std 0.148393
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 15.000000

In [7]:
# Writing clean 'tip' dataframe to csv

tip.to_csv('../../../data/interim/clean_US_cities/tip_clean.csv', encoding='utf-8', index=False)