In [132]:
import surprise as sp
import pyspark as ps
import numpy as np
from pprint import pprint
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
spark = (
    ps.sql.SparkSession.builder
    .config('spark.driver.memory', '1g')
    .config('spark.executor.memory', '1g')
    # .master("local[8]")
    .appName("test-scikit-surprise")
    .getOrCreate()
)

In [62]:
ratings_df = spark.read.parquet('../data/ratings')
ratings_df.show()


+------+----+------+
|  user|item|rating|
+------+----+------+
|259825|1627|     4|
|394869|1627|     1|
|106643|3534|     1|
| 85853| 485|     1|
|   200| 485|     3|
|   182| 485|     3|
| 53398|1621|     4|
| 11280| 485|     3|
|  8994| 741|     3|
| 11520| 741|     3|
| 21380|2693|     5|
| 14094|2693|     1|
|359855|1871|     5|
|  5227|1521|     3|
|352906|1521|     5|
|243591|1871|     4|
|  1301|1871|     4|
|169784|1871|     5|
|  5042|1521|     4|
|278755|1746|     5|
+------+----+------+
only showing top 20 rows


In [65]:
# Load restaurant metadata
restaurants_df = spark.read.parquet('../data/restaurants')

# Load restaurant ids into mapping dataframe
restaurant_id_map = []
index = 0
with open('../data/product_labels.txt') as f:
    for line in f:
        restaurant_id = line.strip()
        restaurant_id_map.append((restaurant_id, index))
        index += 1

restaurant_id_map_df = spark.createDataFrame(restaurant_id_map, ['id', 'item'])

restaurants_with_id_df = restaurants_df.join(restaurant_id_map_df, on='id')

restaurants_with_id_df.show()


+--------------------+--------------------+--------------------+--------------+------------------+--------------------+---------+--------------------+--------------------+------------+-----+------+------------+------------+--------------------+----+
|                  id|          categories|         coordinates| display_phone|          distance|           image_url|is_closed|            location|                name|       phone|price|rating|review_count|transactions|                 url|item|
+--------------------+--------------------+--------------------+--------------+------------------+--------------------+---------+--------------------+--------------------+------------+-----+------+------------+------------+--------------------+----+
|brendas-french-so...|[[breakfast_brunc...|[37.7829016035273...|(415) 345-8100|    510.7695611352|https://s3-media1...|    false|[652 Polk St H,,,...|Brenda's French S...|+14153458100|   $$|   4.0|        7796|          []|https://www.yelp....|   0|
|tartine-bakery-an...|[[bakeries,Bakeri...|[37.7614250022004...|(415) 487-2600|    1039.959209482|https://s3-media1...|    false|[600 Guerrero St,...|Tartine Bakery & ...|+14154872600|   $$|   4.0|        6627|          []|https://www.yelp....|   1|
|burma-superstar-s...| [[burmese,Burmese]]|[37.782787322998,...|(415) 387-2147|2105.8120558279998|https://s3-media1...|    false|[309 Clement St,,...|     Burma Superstar|+14153872147|   $$|   4.0|        5764|          []|https://www.yelp....|   2|
|house-of-prime-ri...|[[tradamerican,Am...|[37.79338,-122.4225]|(415) 885-4605|1249.5617432239999|https://s3-media4...|    false|[1906 Van Ness Av...|  House of Prime Rib|+14158854605|  $$$|   4.0|        5765|          []|https://www.yelp....|   3|
|san-tung-san-fran...|[[chinese,Chinese...|[37.76367,-122.46...|(415) 242-0828|2337.1236205659998|https://s3-media4...|    false|[1031 Irving St,,...|            San Tung|+14152420828|   $$|   4.0|        5525|          []|https://www.yelp....|   4|
|the-slanted-door-...|[[vietnamese,Viet...|[37.7961824442073...|(415) 861-8032|    565.8851752908|https://s3-media4...|    false|[1 Ferry Bldg,Ste...|    The Slanted Door|+14158618032|  $$$|   3.5|        4876|          []|https://www.yelp....|   5|
|pike-place-chowde...|[[seafood,Seafood...|[47.6093656,-122....|(206) 267-2537|      695.49076506|https://s3-media3...|    false|[1530 Post Aly,St...|  Pike Place Chowder|+12062672537|   $$|   4.5|        4741|          []|https://www.yelp....|   6|
|gary-danko-san-fr...|[[newamerican,Ame...|[37.80587,-122.42...|(415) 749-2060|    3175.640937578|https://s3-media3...|    false|[800 N Point St,,...|          Gary Danko|+14157492060| $$$$|   4.5|        4512|          []|https://www.yelp....|   7|
|  nopa-san-francisco|[[newamerican,Ame...|[37.774905,-122.4...|(415) 864-8643|1993.4674100419998|https://s3-media3...|    false|[560 Divisadero S...|                Nopa|+14158648643|  $$$|   4.0|        4479|          []|https://www.yelp....|   8|
|paseo-caribbean-f...|[[caribbean,Carib...|[47.65849,-122.35...|(206) 545-7440|3673.2237598739994|https://s3-media1...|    false|[4225 Fremont Ave...|Paseo Caribbean F...|+12065457440|    $|   4.5|        4454|          []|https://www.yelp....|   9|
|piroshky-piroshky...|[[bakeries,Bakeri...|[47.6099207997322...|(206) 441-6068| 810.4495261815999|https://s3-media4...|    false|[1908 Pike Pl,,,S...|   Piroshky Piroshky|+12064416068|    $|   4.5|        4281|          []|https://www.yelp....|  10|
|hog-island-oyster...|[[seafood,Seafood...|[37.7960118266409...|(415) 391-7117|    2681.632459422|https://s3-media1...|    false|[1 Ferry Bldg,,Sh...|Hog Island Oyster Co|+14153917117|   $$|   4.5|        4261|          []|https://www.yelp....|  11|
|foreign-cinema-sa...|[[breakfast_brunc...|[37.75637,-122.41...|(415) 648-7600|2577.4537657299998|https://s3-media4...|    false|[2534 Mission St,...|      Foreign Cinema|+14156487600|  $$$|   4.0|        4183|          []|https://www.yelp....|  12|
|el-farolito-san-f...|[[mexican,Mexican...|[37.752654,-122.4...|(415) 824-7877|169.24427770519998|https://s3-media1...|    false|[2779 Mission St,...|         El Farolito|+14158247877|    $|   4.0|        4075|          []|https://www.yelp....|  13|
|the-house-san-fra...|[[asianfusion,Asi...|[37.7986203,-122....|(415) 986-8612|2261.6164455119997|https://s3-media3...|    false|[1230 Grant Ave,,...|           The House|+14159868612|  $$$|   4.5|        3995|          []|https://www.yelp....|  14|
|kokkari-estiatori...|[[greek,Greek], [...|[37.7969612181187...|(415) 981-0983|    2242.042043092|https://s3-media2...|    false|[200 Jackson St,,...|  Kokkari Estiatorio|+14159810983|  $$$|   4.5|        3791|          []|https://www.yelp....|  15|
|katana-ya-san-fra...|[[sushi,Sushi Bar...|[37.78726,-122.41...|(415) 771-1281|      532.57727686|https://s3-media4...|    false|[422 Geary St,nul...|           Katana-Ya|+14157711281|   $$|   3.5|        3781|    [pickup]|https://www.yelp....|  16|
|r-and-g-lounge-sa...|[[chinese,Chinese...|[37.7941240989441...|(415) 982-7877|417.74262803919993|https://s3-media3...|    false|[631 Kearny St,,,...|          R&G Lounge|+14159827877|   $$|   3.5|        3746|          []|https://www.yelp....|  17|
| zazie-san-francisco|[[breakfast_brunc...|[37.765244,-122.4...|(415) 564-5332|    1654.856319484|https://s3-media4...|    false|[941 Cole St,,,Sa...|               Zazie|+14155645332|   $$|   4.0|        3654|          []|https://www.yelp....|  18|
|mamas-on-washingt...|[[breakfast_brunc...|[37.801606,-122.4...|(415) 362-6421|1416.5165577517998|https://s3-media2...|    false|[1701 Stockton St...|Mama's On Washing...|+14153626421|   $$|   4.0|        3607|          []|https://www.yelp....|  19|
+--------------------+--------------------+--------------------+--------------+------------------+--------------------+---------+--------------------+--------------------+------------+-----+------+------------+------------+--------------------+----+
only showing top 20 rows


In [66]:
ratings_pdf = ratings_df.toPandas()
restaurants_with_id_pdf = restaurants_with_id_df.toPandas()

In [13]:
reader = sp.Reader(line_format='user item rating', rating_scale=(1,5))
data = sp.Dataset.load_from_df(ratings_pdf[['user','item','rating']], reader)
data.split(n_folds=5)

# We'll use the famous SVD algorithm.
algo = sp.SVD()

# Evaluate performances of our algorithm on the dataset.
perf = sp.evaluate(algo, data, measures=['RMSE', 'MAE', 'FCP'])

sp.print_perf(perf)


Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 1.1339
MAE:  0.8948
FCP:  0.5504
------------
Fold 2
RMSE: 1.1353
MAE:  0.8958
FCP:  0.5594
------------
Fold 3
RMSE: 1.1325
MAE:  0.8934
FCP:  0.5560
------------
Fold 4
RMSE: 1.1354
MAE:  0.8958
FCP:  0.5569
------------
Fold 5
RMSE: 1.1365
MAE:  0.8964
FCP:  0.5567
------------
------------
Mean RMSE: 1.1347
Mean MAE : 0.8952
Mean FCP : 0.5559
------------
------------
        Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    
RMSE    1.1339  1.1353  1.1325  1.1354  1.1365  1.1347  
MAE     0.8948  0.8958  0.8934  0.8958  0.8964  0.8952  
FCP     0.5504  0.5594  0.5560  0.5569  0.5567  0.5559  

In [29]:
print(algo.pu.shape)
print(type(algo.pu))
print(algo.qi.shape)
print(type(algo.qi))
print(algo.bu.shape)
print(type(algo.bu))
print(algo.bi.shape)
print(type(algo.bi))
print(algo.trainset.global_mean)


(447903, 100)
<class 'numpy.ndarray'>
(5633, 100)
<class 'numpy.ndarray'>
(447903,)
<class 'numpy.ndarray'>
(5633,)
<class 'numpy.ndarray'>
3.8437702708

In [30]:
print(algo.pu.mean(), algo.pu.std(), algo.pu.min(), algo.pu.max())
print(algo.qi.mean(), algo.qi.std(), algo.qi.min(), algo.qi.max())
print(algo.bu.mean(), algo.bu.std(), algo.bu.min(), algo.bu.max())
print(algo.bi.mean(), algo.bi.std(), algo.bi.min(), algo.bi.max())


1.11283811115e-05 0.10221515398 -0.632437552024 0.632745359891
-0.000190973182749 0.1787676485 -1.26310230366 1.35552804301
-0.000407575321978 0.154984133721 -1.64604323561 1.39740106161
-0.045250038489 0.471635950249 -2.32364598402 1.08302961424

In [35]:
best_idx = np.argmax(algo.bi)
worst_idx = np.argmin(algo.bi)
print(best_idx, worst_idx)
print(algo.bi[best_idx], algo.bi[worst_idx])


3030 4248
1.08302961424 -2.32364598402

In [134]:
latent_feature_stats = []
for i in range(100):
    latent_feature_stats.append((
        i,
        np.mean(algo.qi[:,i]),
        np.std(algo.qi[:,i]),
        np.min(algo.qi[:,i]),
        np.max(algo.qi[:,i])
    ))
pprint(sorted(latent_feature_stats, key=lambda x: x[2])[::-1])


[(1,
  -0.004089527902321706,
  0.18410698045180171,
  -1.2078317359761741,
  0.79031368791172341),
 (2,
  0.00054042749472026086,
  0.18378546842181384,
  -0.9971179019732187,
  0.84622271915773095),
 (55,
  0.0041622417348315703,
  0.1827197005663064,
  -0.82598047682514308,
  1.0697543228813877),
 (27,
  0.003152773073643748,
  0.18248466490864326,
  -0.994150723871987,
  0.98810815715668476),
 (95,
  0.0039238022244529542,
  0.18229591815075422,
  -1.0460537465130051,
  0.92152905531968787),
 (66,
  0.0022796309960021678,
  0.18214595880418794,
  -0.90033907738503272,
  1.12869772228878),
 (15,
  0.0010131258173113182,
  0.18195760879190365,
  -0.88502061309489277,
  0.83045722009012035),
 (36,
  -0.00062448468826764862,
  0.18179904828732649,
  -0.80297674273534725,
  1.0064540597606297),
 (72,
  0.0005004180643786775,
  0.18166936579272386,
  -1.0568589436888518,
  0.98788307430252187),
 (49,
  -0.0010141840152511662,
  0.18163339124583786,
  -0.98942531665434019,
  1.0978159463529833),
 (45,
  -0.002126822272856078,
  0.18149905654052534,
  -0.88084184099765006,
  0.84160263888573805),
 (61,
  -0.00020011049381728725,
  0.18130229827081218,
  -1.2631023036593567,
  0.85456315849466824),
 (7,
  -0.00034172501854291994,
  0.18128614330687703,
  -1.0458600792677462,
  0.78315953443758635),
 (14,
  -0.0018422616943705394,
  0.18118233124742925,
  -0.8271344571397008,
  0.94757704908552054),
 (76,
  -0.00079718372076415,
  0.18108205094411028,
  -1.1725063215096649,
  0.88251658700029167),
 (84,
  -0.0002035485372576816,
  0.18086702087797321,
  -0.83726520920656955,
  1.0927650212770157),
 (77,
  -0.0035396248338834162,
  0.18083693874455825,
  -1.2030053005370354,
  1.1417643483751789),
 (41,
  -0.0010616440842640129,
  0.18081249215536341,
  -0.88672715072457498,
  0.96711510672053225),
 (57,
  -0.0019054556178706942,
  0.18077763824698231,
  -0.97361602255039659,
  0.89670352914888551),
 (23,
  -0.00014076923703236786,
  0.18076795181298205,
  -0.88468034360400882,
  1.2534402459070519),
 (8,
  0.0030342828991628311,
  0.18056122328548127,
  -0.9166721978058654,
  0.8595625611519675),
 (31,
  0.00065859916410807744,
  0.18031065585353864,
  -0.82798321575358125,
  0.86509351197047613),
 (68,
  0.0017930271994782527,
  0.18017285305148373,
  -0.84215794626443363,
  0.77493950965043334),
 (12,
  -0.0013608017033851321,
  0.18009200263638456,
  -0.88524024608775997,
  1.2809695412212778),
 (81,
  -0.001510020802296989,
  0.18008704683968205,
  -1.2535982848213172,
  0.99510451128413202),
 (25,
  0.00097107032594856109,
  0.1800764185638116,
  -0.86682361493736571,
  0.95316960936544359),
 (51,
  0.0020236023461648735,
  0.1800708699892965,
  -0.93276574230764941,
  0.93651196015747729),
 (88,
  0.0018300707305678584,
  0.18007080248353607,
  -0.87241051162589667,
  0.80096977637282585),
 (83,
  -0.00033824379664292904,
  0.1799871995156867,
  -0.82101308599524747,
  0.88911805222077567),
 (92,
  0.0032123110982985718,
  0.17998707076834478,
  -0.77525180501185453,
  0.94808198257732434),
 (39,
  -0.00048178980266970934,
  0.17988921681284453,
  -1.0836722682390658,
  0.9410486827967941),
 (37,
  0.0026343415496452717,
  0.17979299217025965,
  -1.0078669420228166,
  1.15224259985849),
 (40,
  0.0038594864390286846,
  0.17977482612281304,
  -0.85528347667781268,
  0.75608533930327948),
 (91,
  -0.0046936857181144624,
  0.17976959367443765,
  -0.84594812150405174,
  1.0767431186241352),
 (30,
  0.001929308104730089,
  0.17972384107495348,
  -0.87246327929154599,
  0.83522187682773552),
 (69,
  0.0022979285196325361,
  0.17955411359996443,
  -0.9997233456755471,
  1.1016870072013423),
 (50,
  0.004128396671490185,
  0.17947090342688901,
  -1.0517994823159496,
  0.95774294410565852),
 (73,
  0.00056172363487999545,
  0.17943326698739998,
  -0.83490731780685712,
  0.94344525596402129),
 (48,
  -0.00081426797081979232,
  0.17942858530510694,
  -0.89021850634049704,
  0.86115469052898475),
 (67,
  -0.0012802200788069888,
  0.17941175296351383,
  -0.95467659060427779,
  0.8838065747771322),
 (56,
  0.0010344187123384921,
  0.17935119244782755,
  -0.89010373212508964,
  1.286532835263281),
 (82,
  -0.00054548116957051475,
  0.17933212194249429,
  -0.97411637323709721,
  0.76330532985173494),
 (18,
  0.00052260802616330062,
  0.17927157918302036,
  -1.0975299812059902,
  0.97619749910480014),
 (94,
  -0.00097162527582903544,
  0.17910026387948563,
  -0.80639027095152149,
  0.84994931723510792),
 (46,
  0.00067806113579163216,
  0.17907686532670217,
  -1.1400346291483412,
  0.85541233016261486),
 (65,
  -0.0012411329171800075,
  0.17891275708056967,
  -0.87365142013879105,
  0.82419914068532762),
 (43,
  -0.0024354049385722581,
  0.17878706791981086,
  -1.0929618088238873,
  1.0294361541730155),
 (58,
  0.002790166319565988,
  0.17878044504345089,
  -0.90723552771268323,
  0.8471909028121607),
 (78,
  0.0018822062978612261,
  0.17863051664488577,
  -0.73860121042482518,
  0.96641811191543403),
 (28,
  0.0017629342929604663,
  0.17854024556636516,
  -0.83655263874861896,
  1.0453921568655196),
 (90,
  -0.00067315493471521107,
  0.17853984977259266,
  -0.89596494333491938,
  0.86315651357693124),
 (10,
  0.0038071018231160872,
  0.17852334348862037,
  -0.93434333986305473,
  1.355528043008813),
 (35,
  0.0010449924922573102,
  0.17849671421892777,
  -0.9401376511622952,
  1.1476864409664751),
 (63,
  0.0013976447340525448,
  0.17845049617842423,
  -0.81335395462530269,
  1.0104922993824206),
 (38,
  0.0015026865140093419,
  0.17843642688319808,
  -0.85317602906766388,
  0.95591678156727011),
 (53,
  0.0029308668092992533,
  0.17838466184313106,
  -0.94164862250744297,
  0.97061921254392469),
 (44,
  0.0030902508149981099,
  0.1783574236038441,
  -1.0147035740214552,
  1.0227103903456001),
 (89,
  0.0018890345793832156,
  0.17834956615207573,
  -0.77268737033285462,
  0.83426206499702016),
 (59,
  0.00035328708861057459,
  0.17825600750577728,
  -0.83038519062908611,
  0.76350184075909244),
 (11,
  -0.0029432204776143832,
  0.17817658413451248,
  -0.93236703453027636,
  0.85885580617944079),
 (85,
  -0.0045182754772619604,
  0.17805724624132843,
  -1.0688668780292805,
  0.81770623632287998),
 (70,
  -0.0059565437253669643,
  0.1780569901766493,
  -0.95476836986366498,
  0.86558399414908305),
 (19,
  0.00026560297155331321,
  0.17804140261431028,
  -0.97009427809121429,
  1.0469201375831481),
 (21,
  -0.001857799989242299,
  0.17798675917047987,
  -0.96291414005651454,
  0.70620339688464029),
 (87,
  0.0021274962311128649,
  0.17790603292007243,
  -0.99761728802999527,
  0.82115260395612699),
 (22,
  0.0041253576220585935,
  0.17790437765481404,
  -1.0348667583748496,
  0.99465557814157868),
 (26,
  -0.00028091442975743282,
  0.17790408846617251,
  -1.0433509828617842,
  1.0187474383326354),
 (96,
  0.0020581281151430364,
  0.17788993720337765,
  -0.98504432872840686,
  0.88211338428799646),
 (93,
  0.00019572104156632148,
  0.17787698454093326,
  -0.93222212717667496,
  0.893315661774717),
 (64,
  -0.0019274561484970163,
  0.17773439131846835,
  -1.2021295575157427,
  0.84878524529216071),
 (20,
  -0.0026015330572910569,
  0.17772651555993335,
  -0.97821126231320044,
  0.8396088192668022),
 (47,
  0.0014802902804692528,
  0.17769612418745614,
  -1.02511896922193,
  0.94435144346259148),
 (29,
  -0.0019473854888833009,
  0.17760542124472861,
  -0.89350579168222877,
  0.67089034248820012),
 (42,
  0.001295866088508651,
  0.17759842694485897,
  -1.0362467081995772,
  0.93401816426860795),
 (9,
  0.0022506620711629944,
  0.17734037702388078,
  -0.77585885049743208,
  1.0456368577843254),
 (5,
  -0.00330289739791173,
  0.17727610221944456,
  -0.84858652004679747,
  0.90082985201911214),
 (34,
  -0.0061098071556114249,
  0.17726215690438402,
  -0.82426519434975742,
  1.1218179063642497),
 (60,
  -0.0057701856395295488,
  0.1772349316472164,
  -0.95862827626472269,
  1.0481069649078412),
 (3,
  -0.00065766046001759424,
  0.17717134620852379,
  -0.81291532115249243,
  0.97827761364813381),
 (52,
  0.00029556652717092885,
  0.17713745327516203,
  -0.87080155244036994,
  0.93644183776520051),
 (79,
  0.00031126128102900054,
  0.17706933272467662,
  -0.85555221816849791,
  0.78218417241011184),
 (13,
  -0.0011846964190777738,
  0.17702726945406524,
  -0.81756430984732453,
  1.1082658136809285),
 (24,
  -0.0044968112696149856,
  0.17691691416806965,
  -0.89047389541482669,
  0.86163066454768444),
 (33,
  -0.0038733865345920988,
  0.17664989965187994,
  -1.0538833517199795,
  1.0192252757300266),
 (6,
  0.00049973748859759434,
  0.17658915051382815,
  -0.82199981981025716,
  0.94107932188937604),
 (86,
  -0.0015991010821373285,
  0.17654755683152107,
  -0.98946936682494524,
  0.86254720804392171),
 (71,
  -0.0027142523938311338,
  0.17650044463382072,
  -0.9997446966956548,
  0.80556859054944141),
 (54,
  -0.0018195712814257159,
  0.17633971962865719,
  -0.92895897044757092,
  0.70945946020515338),
 (32,
  -0.0019782011791709057,
  0.17631195995161875,
  -0.76153552363444965,
  0.80534063676943124),
 (62,
  0.00098238563194351846,
  0.17621386287322821,
  -0.75724511501613467,
  0.74855311785036704),
 (0,
  -0.0026187776512386312,
  0.17613442313932759,
  -0.92938007905983,
  1.0086585381864068),
 (97,
  0.00022528478125845107,
  0.17610229226529914,
  -0.96829695433050655,
  0.85177521221388097),
 (74,
  0.0012680953324178427,
  0.17604197597303065,
  -0.8860821525564736,
  0.88882040052776534),
 (80,
  -0.0034492328516364124,
  0.17506790870550129,
  -0.96905183462652278,
  1.0341472419222155),
 (17,
  -0.0022641508719748816,
  0.17423354351513487,
  -0.87156847672479754,
  0.77125598747148649),
 (16,
  -0.0015937060331409688,
  0.17422480141275326,
  -0.7619509629287371,
  0.95458619720722704),
 (99,
  -9.6907291628512787e-05,
  0.17409472696552092,
  -0.83226761780194858,
  0.97859566330014447),
 (75,
  0.0013029594674330903,
  0.17399005517371144,
  -0.7585003457422278,
  0.82374719878420954),
 (4,
  -0.0040069074856868229,
  0.1739875170845806,
  -0.89159064307645752,
  0.77509573961024125),
 (98,
  -0.0031720079179393666,
  0.17397249427221997,
  -0.78915960699964272,
  0.93785467889813923)]

In [67]:
restaurants_with_id_pdf.head()


Out[67]:
id categories coordinates display_phone distance image_url is_closed location name phone price rating review_count transactions url item
0 brendas-french-soul-food-san-francisco [(breakfast_brunch, Breakfast & Brunch), (fren... (37.7829016035273, -122.419043442957) (415) 345-8100 510.769561 https://s3-media1.fl.yelpcdn.com/bphoto/YT8ho0... False (652 Polk St H, , , San Francisco, US, [652 Po... Brenda's French Soul Food +14153458100 $$ 4.0 7796 [] https://www.yelp.com/biz/brendas-french-soul-f... 0
1 tartine-bakery-and-cafe-san-francisco [(bakeries, Bakeries), (cafes, Cafes), (desser... (37.7614250022004, -122.424051321456) (415) 487-2600 1039.959209 https://s3-media1.fl.yelpcdn.com/bphoto/vTLu8G... False (600 Guerrero St, , , San Francisco, US, [600 ... Tartine Bakery & Cafe +14154872600 $$ 4.0 6627 [] https://www.yelp.com/biz/tartine-bakery-and-ca... 1
2 burma-superstar-san-francisco-2 [(burmese, Burmese)] (37.782787322998, -122.462539672852) (415) 387-2147 2105.812056 https://s3-media1.fl.yelpcdn.com/bphoto/x6rvBD... False (309 Clement St, , , San Francisco, US, [309 C... Burma Superstar +14153872147 $$ 4.0 5764 [] https://www.yelp.com/biz/burma-superstar-san-f... 2
3 house-of-prime-rib-san-francisco [(tradamerican, American (Traditional)), (stea... (37.79338, -122.4225) (415) 885-4605 1249.561743 https://s3-media4.fl.yelpcdn.com/bphoto/HLrjaM... False (1906 Van Ness Ave, , , San Francisco, US, [19... House of Prime Rib +14158854605 $$$ 4.0 5765 [] https://www.yelp.com/biz/house-of-prime-rib-sa... 3
4 san-tung-san-francisco-2 [(chinese, Chinese), (chicken_wings, Chicken W... (37.76367, -122.46896) (415) 242-0828 2337.123621 https://s3-media4.fl.yelpcdn.com/bphoto/P5_EgB... False (1031 Irving St, , , San Francisco, US, [1031 ... San Tung +14152420828 $$ 4.0 5525 [] https://www.yelp.com/biz/san-tung-san-francisc... 4

In [136]:
num_to_plot = 200
latent_feature_x = 1
latent_feature_y = 2

fig, ax = plt.subplots(1, 1, figsize=(20,20))
xs = algo.qi[:num_to_plot, latent_feature_x]
ys = algo.qi[:num_to_plot, latent_feature_y]
labels = restaurants_with_id_pdf[:num_to_plot][['name']].values.flatten()

ax.scatter(xs, ys, s=1)

for label, x, y in zip(labels, xs, ys):
    ax.text(x, y, label, horizontalalignment='center', verticalalignment='center')



In [ ]: