In [1]:

    
import pandas as pd

CHN



In [2]:

    
bid = pd.read_csv("../yelp-challenge/LDA/CHN15/01_ALL_Chinese_Business_ID.csv")
topic_CHN_5 = pd.read_csv("../yelp-challenge/LDA/CHN5/topic2doc.csv", index_col=0)
topic_CHN_5.index = bid.business_id.values
topic_CHN_15 = pd.read_csv("../yelp-challenge/LDA/CHN15/04_topic2doc.csv", index_col=0)
topic_CHN_15.index = bid.business_id.values



In [3]:

    
TOR_CHN = pd.read_csv("../yelp-challenge/Anomaly/Result/torronto_chi_results.csv")
TOR_CHN = TOR_CHN.loc[:, 'clusters_sp':'clusters_gm']

PHO_CHN = pd.read_csv("../yelp-challenge/Anomaly/Result/Phoenix_chi_results.csv")
PHO_CHN = PHO_CHN.loc[:, 'clusters_sp':'clusters_gm']

LAS_CHN = pd.read_csv("../yelp-challenge/Anomaly/Result/LasVegas_chi_results.csv")
LAS_CHN = LAS_CHN.loc[:, 'clusters_sp':'clusters_gm']



In [4]:

    
TOR_CHN_LDA5 = TOR_CHN.join(topic_CHN_5[['max']],how='left')
TOR_CHN_LDA15 = TOR_CHN.join(topic_CHN_15[['max']],how='left')

PHO_CHN_LDA5 = PHO_CHN.join(topic_CHN_5[['max']],how='left')
PHO_CHN_LDA15 = PHO_CHN.join(topic_CHN_15[['max']],how='left')

LAS_CHN_LDA5 = LAS_CHN.join(topic_CHN_5[['max']],how='left')
LAS_CHN_LDA15 = LAS_CHN.join(topic_CHN_15[['max']],how='left')



In [5]:

    
def ClsuterTopic(LDA):
    for method in ['clusters_sp']:#, 'clusters_km', 'clusters_gm']:
        n_cluster = len(LDA.loc[:,method].unique())
        print method
        for i in range(n_cluster):
            tmp = LDA[LDA.loc[:,method] == i]
            print "c{}: {}".format(i,tmp.groupby(tmp['max']).size().argmax()+1) # topics index start from 1  
        print '\n'

1. SP + CHN15



In [6]:

    
ClsuterTopic(TOR_CHN_LDA15)
ClsuterTopic(PHO_CHN_LDA15)
ClsuterTopic(LAS_CHN_LDA15)









    



clusters_sp
c0: 13
c1: 15


clusters_sp
c0: 13
c1: 12


clusters_sp
c0: 13
c1: 4



In [7]:

    
CHN15_TOPIC = pd.read_csv("../yelp-challenge/LDA/CHN15/04_top30_word2topic.csv")
CHN15_TOPIC.loc[:,['Topic 13', 'Topic 15', 'Topic 12', 'Topic 4']]









    Out[7]:






  
    
      
      Topic 13
      Topic 15
      Topic 12
      Topic 4
    
  
  
    
      0
      happy
      bowl
      chicken
      tea
    
    
      1
      menu
      chicken
      chinese
      milk
    
    
      2
      hour
      sauce
      rice
      boba
    
    
      3
      server
      rice
      fried
      drink
    
    
      4
      ordered
      fresh
      ordered
      drinks
    
    
      5
      table
      location
      egg
      ice
    
    
      6
      bar
      teriyaki
      lunch
      bubble
    
    
      7
      night
      veggies
      soup
      sweet
    
    
      8
      drinks
      eat
      beef
      green
    
    
      9
      experience
      meat
      shrimp
      taste
    
    
      10
      wait
      staff
      sauce
      mango
    
    
      11
      beer
      sauces
      sour
      pretty
    
    
      12
      atmosphere
      meal
      hot
      ordered
    
    
      13
      waitress
      friendly
      delivery
      sugar
    
    
      14
      drink
      spicy
      eat
      places
    
    
      15
      minutes
      clean
      friendly
      taro
    
    
      16
      location
      burrito
      orange
      teas
    
    
      17
      dinner
      fast
      sweet
      dessert
    
    
      18
      house
      lunch
      fast
      taiwanese
    
    
      19
      waiter
      give
      rolls
      jelly
    
    
      20
      pretty
      panda
      menu
      tapioca
    
    
      21
      staff
      healthy
      chow
      menu
    
    
      22
      asked
      shabu
      bad
      friendly
    
    
      23
      friends
      customer
      delicious
      desserts
    
    
      24
      manager
      line
      fresh
      friends
    
    
      25
      seated
      pretty
      pretty
      location
    
    
      26
      meal
      noodles
      portions
      staff
    
    
      27
      selection
      quick
      give
      fresh
    
    
      28
      busy
      bowls
      years
      flavor
    
    
      29
      bad
      pei
      family
      lot

JPN



In [8]:

    
bid2 = pd.read_csv("../yelp-challenge/LDA/JPN5/ALL_Japanese_Business_ID.csv")
topic_JPN_5 = pd.read_csv("../yelp-challenge/LDA/JPN5/topic2doc.csv", index_col=0)
topic_JPN_5.index = bid2.business_id.values



In [9]:

    
TOR_JPN = pd.read_csv("../yelp-challenge/Anomaly/Result/torronto_jap_results.csv")
TOR_JPN = TOR_JPN.loc[:, 'clusters_sp':'clusters_gm']

PHO_JPN = pd.read_csv("../yelp-challenge/Anomaly/Result/Phoenix_jap_results.csv")
PHO_JPN = PHO_JPN.loc[:, 'clusters_sp':'clusters_gm']

LAS_JPN = pd.read_csv("../yelp-challenge/Anomaly/Result/LasVegas_jap_results.csv")
LAS_JPN = LAS_JPN.loc[:, 'clusters_sp':'clusters_gm']



In [10]:

    
TOR_JPN_LDA5 = TOR_JPN.join(topic_JPN_5[['max']],how='left')
PHO_JPN_LDA5 = PHO_JPN.join(topic_JPN_5[['max']],how='left')
LAS_JPN_LDA5 = LAS_JPN.join(topic_JPN_5[['max']],how='left')



In [11]:

    
def ClsuterTopic(LDA):
    for method in ['clusters_gm']:#['clusters_sp', 'clusters_km', 'clusters_gm']:
        n_cluster = len(LDA.loc[:,method].unique())
        print method
        for i in range(n_cluster):
            tmp = LDA[LDA.loc[:,method] == i]
            print "c{}: {}".format(i,tmp.groupby(tmp['max']).size().argmax()+1) # topics index start from 1  
        print '\n'



In [12]:

    
ClsuterTopic(TOR_JPN_LDA5)
ClsuterTopic(PHO_JPN_LDA5)
ClsuterTopic(LAS_JPN_LDA5)









    



clusters_gm
c0: 3
c1: 3
c2: 3
c3: 3


clusters_gm
c0: 2
c1: 3
c2: 3
c3: 3


clusters_gm
c0: 2
c1: 2
c2: 2
c3: 3



In [13]:

    
JPN5_TOPIC = pd.read_csv("../yelp-challenge/LDA/JPN5/top30_word2topic.csv")
JPN5_TOPIC.loc[:,['Topic 2', 'Topic 3']]









    Out[13]:






  
    
      
      Topic 2
      Topic 3
    
  
  
    
      0
      sushi
      ramen
    
    
      1
      roll
      pork
    
    
      2
      rolls
      noodles
    
    
      3
      fresh
      broth
    
    
      4
      fish
      bowl
    
    
      5
      eat
      chicken
    
    
      6
      quality
      japanese
    
    
      7
      ayce
      spicy
    
    
      8
      salmon
      ordered
    
    
      9
      menu
      soup
    
    
      10
      tuna
      rice
    
    
      11
      sashimi
      curry
    
    
      12
      lunch
      small
    
    
      13
      ordered
      delicious
    
    
      14
      rice
      wait
    
    
      15
      spicy
      pretty
    
    
      16
      tempura
      bit
    
    
      17
      friendly
      miso
    
    
      18
      pretty
      taste
    
    
      19
      price
      egg
    
    
      20
      places
      noodle
    
    
      21
      staff
      menu
    
    
      22
      dinner
      sauce
    
    
      23
      delicious
      fried
    
    
      24
      amazing
      friendly
    
    
      25
      wait
      tea
    
    
      26
      favorite
      places
    
    
      27
      2
      meat
    
    
      28
      nigiri
      side
    
    
      29
      salad
      lunch

What if No Clustering for One city?

CHN5



In [14]:

    
TOR_CHN_LDA5['max'].value_counts().argmax()









    Out[14]:





3



In [15]:

    
PHO_CHN_LDA5['max'].value_counts().argmax()









    Out[15]:





3



In [16]:

    
LAS_CHN_LDA5['max'].value_counts().argmax()









    Out[16]:





3

CHN15



In [17]:

    
TOR_CHN_LDA15['max'].value_counts().argmax()









    Out[17]:





12



In [18]:

    
PHO_CHN_LDA15['max'].value_counts().argmax()









    Out[18]:





12



In [19]:

    
LAS_CHN_LDA15['max'].value_counts().argmax()









    Out[19]:





12

JPN5



In [20]:

    
TOR_JPN_LDA5['max'].value_counts().argmax()









    Out[20]:





2



In [21]:

    
PHO_JPN_LDA5['max'].value_counts().argmax()









    Out[21]:





2



In [22]:

    
LAS_JPN_LDA5['max'].value_counts().argmax()









    Out[22]:





2



In [ ]:

OUTPUT: DataFrame -> LaTeX



In [29]:

    
jpn_out = JPN5_TOPIC.iloc[:10,1:]
jpn_out.index = range(1,11,1)



In [30]:

    
print jpn_out.to_latex()









    



\begin{tabular}{llllll}
\toprule
{} &  Topic 1 &  Topic 2 &   Topic 3 &     Topic 4 &   Topic 5 \\
\midrule
1  &  chicken &    sushi &     ramen &        menu &    korean \\
2  &    happy &     roll &      pork &    japanese &       bbq \\
3  &     hour &    rolls &   noodles &      dishes &     sushi \\
4  &  ordered &    fresh &     broth &        dish &   burrito \\
5  &     rice &     fish &      bowl &     amazing &      meat \\
6  &    table &      eat &   chicken &   delicious &   bulgogi \\
7  &    lunch &  quality &  japanese &     ordered &  montreal \\
8  &   shrimp &     ayce &     spicy &  experience &  bibimbap \\
9  &   pretty &   salmon &   ordered &       vegas &     meats \\
10 &    sauce &     menu &      soup &       night &     kalbi \\
\bottomrule
\end{tabular}



In [52]:

    
chn_out = CHN15_TOPIC.iloc[:10,1:]
chn_out.index = range(1,11,1)
chn_out = chn_out.loc[:, ['Topic 4', 'Topic 12', 'Topic 13', 'Topic 15']]



In [53]:

    
print chn_out.to_latex()









    



\begin{tabular}{lllll}
\toprule
{} & Topic 4 & Topic 12 &    Topic 13 &  Topic 15 \\
\midrule
1  &     tea &  chicken &       happy &      bowl \\
2  &    milk &  chinese &        menu &   chicken \\
3  &    boba &     rice &        hour &     sauce \\
4  &   drink &    fried &      server &      rice \\
5  &  drinks &  ordered &     ordered &     fresh \\
6  &     ice &      egg &       table &  location \\
7  &  bubble &    lunch &         bar &  teriyaki \\
8  &   sweet &     soup &       night &   veggies \\
9  &   green &     beef &      drinks &       eat \\
10 &   taste &   shrimp &  experience &      meat \\
\bottomrule
\end{tabular}

	Topic 13	Topic 15	Topic 12	Topic 4
0	happy	bowl	chicken	tea
1	menu	chicken	chinese	milk
2	hour	sauce	rice	boba
3	server	rice	fried	drink
4	ordered	fresh	ordered	drinks
5	table	location	egg	ice
6	bar	teriyaki	lunch	bubble
7	night	veggies	soup	sweet
8	drinks	eat	beef	green
9	experience	meat	shrimp	taste
10	wait	staff	sauce	mango
11	beer	sauces	sour	pretty
12	atmosphere	meal	hot	ordered
13	waitress	friendly	delivery	sugar
14	drink	spicy	eat	places
15	minutes	clean	friendly	taro
16	location	burrito	orange	teas
17	dinner	fast	sweet	dessert
18	house	lunch	fast	taiwanese
19	waiter	give	rolls	jelly
20	pretty	panda	menu	tapioca
21	staff	healthy	chow	menu
22	asked	shabu	bad	friendly
23	friends	customer	delicious	desserts
24	manager	line	fresh	friends
25	seated	pretty	pretty	location
26	meal	noodles	portions	staff
27	selection	quick	give	fresh
28	busy	bowls	years	flavor
29	bad	pei	family	lot

	Topic 2	Topic 3
0	sushi	ramen
1	roll	pork
2	rolls	noodles
3	fresh	broth
4	fish	bowl
5	eat	chicken
6	quality	japanese
7	ayce	spicy
8	salmon	ordered
9	menu	soup
10	tuna	rice
11	sashimi	curry
12	lunch	small
13	ordered	delicious
14	rice	wait
15	spicy	pretty
16	tempura	bit
17	friendly	miso
18	pretty	taste
19	price	egg
20	places	noodle
21	staff	menu
22	dinner	sauce
23	delicious	fried
24	amazing	friendly
25	wait	tea
26	favorite	places
27	2	meat
28	nigiri	side
29	salad	lunch