In [3]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
DataFolder = "/media/yisi/Data/Challenge/yelp_dataset_challenge_round9/"
In [41]:
with open(DataFolder+"yelp_academic_dataset_review.json", 'rb') as f:
data = f.readlines()
data = map(lambda x: x.rstrip(), data)
data_json_str = "[" + ','.join(data) + "]"
# now, load it into pandas
data_df = pd.read_json(data_json_str)
In [42]:
data_df.head(6)
Out[42]:
In [43]:
data_df.dtypes
Out[43]:
In [44]:
data_df.loc[0,'text']
Out[44]:
In [23]:
Out[23]:
In [ ]: