0.

Directory
Business_id for other cusine types

1.Load 'review'



In [1]:

    
import pandas as pd

with open('./yelp_academic_dataset_review.json', 'rb') as f:
    data = f.readlines()
data = map(lambda x: x.rstrip(), data)
data_json_str = "[" + ','.join(data) + "]"
rw = pd.read_json(data_json_str)

2.Load 'cusine' (eg.Chinese Cusine')



In [74]:

    
all_jpn = pd.read_csv("./ALL_Japanese_Business_ID.csv")
all_jpn.head(3)









    Out[74]:






  
    
      
      business_id
    
  
  
    
      Eq3qA7F5uZBUbcYXROzntA
      Eq3qA7F5uZBUbcYXROzntA
    
    
      Ld2hhA3q3cdkptwS1fsYEg
      Ld2hhA3q3cdkptwS1fsYEg
    
    
      tGBeFfwXCUZOsb0YWiMWIA
      tGBeFfwXCUZOsb0YWiMWIA

3. Merge 1&2



In [75]:

    
all_rw_jpn = all_jpn.merge(rw, how='left', left_index=True, right_on='business_id')
print len(rw)
print len(all_jpn)
print len(all_rw_jpn)

4. Merge 'reviews' for each 'business_id'



In [76]:

    
test = all_rw_jpn.copy()
del test['business_id_x']
del test['business_id_y']
test.set_index(inplace=True, keys=test.business_id.values)
output = test.groupby(test['business_id']).apply(lambda x: '. '.join(x.text))
output2 = pd.DataFrame(output, index=output.index.values)
output2.rename(columns={0:'text'}, inplace=True)
output2['business_id'] = output2.index.values
output2.reset_index(drop=True, inplace=True)
print len(output)
output2.head(10)









    



1625






    Out[76]:






  
    
      
      text
      business_id
    
  
  
    
      0
      Came here for lunch \nOrdered combination bent...
      -4bPFENRdTqjML8aKEL6ow
    
    
      1
      Place was okay, came here because we couldn't ...
      -6mzdR0YjOToJ8E04Y9O0Q
    
    
      2
      Prices have gone up, quality has gone down. Su...
      -AVRReI-nfsa0lKlehEojw
    
    
      3
      After unsuccessfully trying the other location...
      -BbnAc9YEO6pjvJGEtFbVQ
    
    
      4
      UPDATE: Went back!!! Kuddos to the owners! Had...
      -DnaKAs2oK3rXfrjSvn9ew
    
    
      5
      New at red Rock casino. A bit overpriced. Slow...
      -FcZY7a7qgxTUlTvwuyJnQ
    
    
      6
      Maybe I am a sushi snob because we have amazin...
      -FyvAo_bNe6eXWpEHpSUrQ
    
    
      7
      You should be able to give 0 stars when you ba...
      -GOsHrWPC0meDRgkCEgC8w
    
    
      8
      First thing I noticed when I walked in was the...
      -ITj6Pu8Gdw8MmLf0XBEKQ
    
    
      9
      Hands down best sushi I've eaten in Toronto. E...
      -Ipm_8YXj9UoNNHagCvsNg

5.output



In [77]:

    
output2.to_csv("./all_jpn_rw.csv", index_label=False, encoding='utf-8')

	business_id
Eq3qA7F5uZBUbcYXROzntA	Eq3qA7F5uZBUbcYXROzntA
Ld2hhA3q3cdkptwS1fsYEg	Ld2hhA3q3cdkptwS1fsYEg
tGBeFfwXCUZOsb0YWiMWIA	tGBeFfwXCUZOsb0YWiMWIA

	text	business_id
0	Came here for lunch \nOrdered combination bent...	-4bPFENRdTqjML8aKEL6ow
1	Place was okay, came here because we couldn't ...	-6mzdR0YjOToJ8E04Y9O0Q
2	Prices have gone up, quality has gone down. Su...	-AVRReI-nfsa0lKlehEojw
3	After unsuccessfully trying the other location...	-BbnAc9YEO6pjvJGEtFbVQ
4	UPDATE: Went back!!! Kuddos to the owners! Had...	-DnaKAs2oK3rXfrjSvn9ew
5	New at red Rock casino. A bit overpriced. Slow...	-FcZY7a7qgxTUlTvwuyJnQ
6	Maybe I am a sushi snob because we have amazin...	-FyvAo_bNe6eXWpEHpSUrQ
7	You should be able to give 0 stars when you ba...	-GOsHrWPC0meDRgkCEgC8w
8	First thing I noticed when I walked in was the...	-ITj6Pu8Gdw8MmLf0XBEKQ
9	Hands down best sushi I've eaten in Toronto. E...	-Ipm_8YXj9UoNNHagCvsNg