In [7]:
review = """
pool tables
"""

In [1]:
import sys
sys.path.append('../vectorsearch')
import vectorsearch
reload(vectorsearch)

# SearchBusinesses(review)
top_n = 5 # Number of topics to choose for top of list. 
rev_topic = np.array(vectorsearch.GetDocTopic('beer, brewery'))
# Get the top few topics for this review. 
top_n_topics = rev_topic.argsort()[-top_n:][::-1]
# print rev_topic # Print the topic vector. 


for topic in top_n_topics:
    print rev_topic[topic], vectorsearch.GetTopicWords(topic, )
      
top_bus_id, top_bus_sim = vectorsearch.FindBusinessSimilarityLDA(rev_topic, business_ids=None)
print top_bus_sim

plt = vectorsearch.visualize_topic(rev_topic, num_topics=6, save_path='/home/carlson/web/images/insight/test.png')
plt.show()


/home/carlson/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:1350: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)
path /data/insight_yelp/input/
/home/carlson/anaconda/envs/insight/lib/python2.7/site-packages/unidecode/__init__.py:46: RuntimeWarning: Argument <type 'str'> is not an unicode object. Passing an encoded string will likely have unexpected results.
  _warn_if_not_unicode(string)
0.99439606264 [u'beer', u'food', u'selection', u'tap', u'brew', u'brewery', u'try', u'ale', u'love', u'craft']
0.0242535625941 [u'madison', u'food', u'fry', u'cheese', u'beer', u'fish', u'curd', u'brat', u'sandwich', u'gordon']
0.0242535625464 [u'pub', u'food', u'irish', u'chip', u'fish', u'beer', u'pasty', u'bit', u'pie', u'staff']
0.0242535625416 [u'food', u'pizza', u'wing', u'chicken', u'always', u'beer', u'sauce', u'order', u'hour', u'happy']
0.0242535625369 [u'food', u'table', u'came', u'server', u'waitress', u'order', u'ordered', u'beer', u'after', u'game']
[ 0.99477047  0.99451357  0.99450679  0.99407517  0.99377927  0.99055294
  0.98924865  0.98852023  0.98797304  0.98775524]

AttributeErrorTraceback (most recent call last)
<ipython-input-1-06ef334eb7c6> in <module>()
     19 
     20 plt = vectorsearch.visualize_topic(rev_topic, num_topics=6, save_path='/home/carlson/web/images/insight/test.png')
---> 21 plt.show()

AttributeError: 'NoneType' object has no attribute 'show'

In [ ]:


In [25]:
plt.plot([1,2])


Out[25]:
[<matplotlib.lines.Line2D at 0x7fb1a5c52a50>]

In [10]:
import word2vec
word2vec = word2vec.Word2Vec.load('../output/word2vec_bars.model')

In [11]:
word2vec.most_similar({'beer':1})


Out[11]:
[(u'brew', 0.8065862655639648),
 ('microbrews', 0.757482647895813),
 (u'draft', 0.7386277914047241),
 ('micro-brews', 0.7076277732849121),
 (u'import', 0.6859886050224304),
 ('draught', 0.6843554973602295),
 ('pint', 0.6778723001480103),
 ('microbrew', 0.6768009662628174),
 ('micros', 0.6647520065307617),
 ('stella', 0.6498304605484009)]

In [12]:
topic_word_distributions = vectorsearch.bus_lda.lda.components_
feature_names = vectorsearch.bus_lda.tf_vectorizer.get_feature_names()

for i_topic in top_n_topics:
    topic_vec = topic_word_distributions[i_topic]
    
    words = {feature_name:topic_vec[i_feature] for i_feature, feature_name in enumerate(feature_names)} 
    print word2vec.most_similar(words)
    print 
    #print vectorsearch.GetTopicWords(words, )


[('on-tap', 0.6547501683235168), ('micro-brews', 0.6507043242454529), ('macro', 0.6144199967384338), ('hard-to-find', 0.5762535333633423), ('tremens', 0.5379459857940674), ('beercider', 0.5347697734832764), ('magners', 0.5161868929862976), ('peroni', 0.515639066696167), ('takeover', 0.5094289779663086), ('microbrewed', 0.505642831325531)]

[('recording', 0.5654067397117615), ('playhouse', 0.5553803443908691), ('staging', 0.5533091425895691), ('documentary', 0.5354198217391968), (u'sequence', 0.5283054113388062), ('recorded', 0.5275779962539673), ('ticketed', 0.5245670080184937), ('trapeze', 0.5236397981643677), (u'headset', 0.5235076546669006), ('hypnotized', 0.5208207368850708)]

[('soupsalad', 0.5353297591209412), ('foie-gras', 0.5209020376205444), ('pepper-crusted', 0.5076851844787598), ('heartier', 0.4892144203186035), ('healthful', 0.48864519596099854), ('soup-and-salad', 0.4845465123653412), ('farro', 0.4774717092514038), ('stir-fry', 0.47416752576828003), ('kisra', 0.471383661031723), ('pre-selected', 0.47017961740493774)]

[('bar', 0.5092723369598389), ('hang-out', 0.5035831928253174), ('place', 0.49669766426086426), ('play-music', 0.4880654513835907), ('out-there', 0.48037925362586975), ('on-the-street', 0.4766116738319397), ('people', 0.4742978513240814), ('walk-around', 0.47404131293296814), ('have-fun', 0.46461808681488037), (u'flood', 0.459128737449646)]

[('room-key', 0.527092456817627), ('5k', 0.49921536445617676), ('walk-around', 0.49578702449798584), ('stand-in-line', 0.48447325825691223), ('renting', 0.4798318147659302), ('transportation', 0.47822773456573486), ('donation', 0.4776236414909363), ('ticketed', 0.4734191298484802), ('on-the-street', 0.47061994671821594), ('get-in', 0.4673961400985718)]

[('sammies', 0.5060423612594604), ('healthful', 0.49352145195007324), ('1275', 0.448365718126297), ('meatless', 0.4475715458393097), ('gluten-free', 0.4421044588088989), ('foodwise', 0.44149062037467957), ('wursts', 0.43637359142303467), ('nontraditional', 0.4184848964214325), ('french-fries', 0.41193461418151855), ('half-pound', 0.406582772731781)]

[('carry-out', 0.5062217712402344), ('soup-and-salad', 0.4841962158679962), ('take-out', 0.48085418343544006), ('healthful', 0.46020257472991943), ('soupsalad', 0.4559124708175659), ('at-least', 0.4418348968029022), ('pre-made', 0.4115612506866455), ('one', 0.40837234258651733), ('gluten-free', 0.40659651160240173), ('fast-food', 0.4036417603492737)]

[('bar', 0.5234275460243225), ('walk-in', 0.48420315980911255), ('walk-around', 0.46516096591949463), (u'drink', 0.4500669240951538), ('hang-out', 0.4419266879558563), ('sit-down', 0.43767696619033813), ('place', 0.408968448638916), ('leisure', 0.40831321477890015), ('sit-around', 0.40153267979621887), ('well-stocked', 0.3995237946510315)]

[('grazing', 0.41817453503608704), ('gluten-free', 0.41723042726516724), ('sit-down', 0.41137194633483887), ('also', 0.4083687663078308), ('redwhite', 0.3999023139476776), ('walk-in', 0.39643537998199463), (u'cookbook', 0.3939911425113678), ('4-6', 0.39187562465667725), ('self-service', 0.3904525637626648), ('non-alcoholic', 0.3874114751815796)]

[('healthful', 0.505614697933197), ('soup-and-salad', 0.49999088048934937), ('soupsalad', 0.46071022748947144), ('seitan', 0.44865816831588745), ('gorditas', 0.4436034560203552), ('chipssalsa', 0.44096094369888306), ('machaca', 0.4369865953922272), ('houmous', 0.4335595965385437), ('carry-out', 0.4288868308067322), ('pre-made', 0.42306041717529297)]

[('kisra', 0.543602466583252), ('pad-thai', 0.5317939519882202), ('pan-roast', 0.526111900806427), ('green-curry', 0.5241729617118835), ('pisto', 0.5176573991775513), ('healthful', 0.5090059041976929), ('soupsalad', 0.5086833238601685), ('red-curry', 0.5084079504013062), ('porco', 0.4982275366783142), ('heartier', 0.49718815088272095)]

[('sammies', 0.574152946472168), ('french-fries', 0.5680028200149536), ('american-cheese', 0.5429219603538513), ('soupsalad', 0.5320847034454346), ('black-bean', 0.5272148847579956), ('healthful', 0.5236890316009521), ('kaiser', 0.5043490529060364), ('burger-and-fries', 0.49939441680908203), ('come-with', 0.4981990456581116), ('club-sandwich', 0.4968724548816681)]

[(u'u', 0.586947500705719), ('even', 0.5833797454833984), ('even-though', 0.5568888187408447), ('at-least', 0.5448140501976013), ('check-with', 0.5083034038543701), ('come-over', 0.49409568309783936), ('retrieve', 0.4879398047924042), ('credit-cards', 0.4843982458114624), ('sit-down', 0.4837633967399597), ('credit-card', 0.4793306887149811)]

[('nicholsons', 0.5058215856552124), ('belhaven', 0.4887256324291229), ('cloister', 0.46262896060943604), ('tennents', 0.4512452781200409), ('macro', 0.45035749673843384), ('old-man', 0.4436134397983551), ('crag', 0.43696725368499756), ('tollcross', 0.43250709772109985), ('newington', 0.42870181798934937), ('convenience-store', 0.41902822256088257)]

[('dance-floor', 0.5924915075302124), ('main-room', 0.549978494644165), ('people', 0.5385692119598389), ('get-up', 0.533581554889679), ('get-in', 0.530096173286438), ('get-into', 0.5087651610374451), ('stand-in-line', 0.5056946277618408), ('swarmed', 0.5043419599533081), ('in-line', 0.5030391216278076), ('lapdance', 0.4978155195713043)]

[('ainsi', 0.9275541305541992), ('detre', 0.9262288808822632), ('vers', 0.9254355430603027), ('quun', 0.9246653318405151), ('quune', 0.9223595857620239), ('vient', 0.9208523035049438), ('decouvrir', 0.9207836389541626), ('lorsque', 0.9205660820007324), ('compte', 0.915930986404419), ('tant', 0.9152344465255737)]

[('pad-thai', 0.6172152757644653), ('unagi', 0.5877277255058289), ('ika', 0.5747519731521606), ('green-curry', 0.5538158416748047), ('chirashi', 0.5528947114944458), ('pan-roast', 0.5511738061904907), ('healthful', 0.5461515784263611), ('spring-roll', 0.5354868769645691), ('aji', 0.5335656404495239), ('tiraditos', 0.5285044312477112)]

[('healthful', 0.553087592124939), ('sammies', 0.5288299322128296), ('black-bean', 0.5117930769920349), ('meatless', 0.4982501268386841), ('american-cheese', 0.4966337978839874), ('carvery', 0.47891518473625183), ('heartier', 0.4764081835746765), ('kaiser', 0.47612252831459045), ('french-fries', 0.4743710160255432), ('open-faced', 0.46209025382995605)]

[('eigentlich', 0.9487757086753845), ('gute', 0.9432682991027832), ('vielleicht', 0.9428591728210449), ('dazu', 0.9410887360572815), ('viele', 0.9393306970596313), ('seine', 0.9389042854309082), ('keinen', 0.9383755922317505), ('abend', 0.9381824731826782), ('zwar', 0.9379448890686035), ('richtig', 0.937127947807312)]

[('homecooked', 0.687390148639679), ('shanghai', 0.6479090452194214), (u'philippine', 0.6227261424064636), ('michoacan', 0.6066682934761047), ('vieja', 0.5746031999588013), ('tex-mex', 0.5732633471488953), ('satara', 0.5716061592102051), ('ropa', 0.5695248246192932), ('tapsilog', 0.5567358732223511), ('pancit', 0.5556012392044067)]


In [47]:
vectorsearch.bus_lda.lda.components_.shape


Out[47]:
(20, 10000)

In [3]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.plot([1,2,3])
plt.savefig('myfig')

plt.scatter(0,1)
plt.savefig('/home/carlson/web/images/insight/test4.png')



In [6]:
print vectorsearch.bus_lda_topics[vectorsearch.bus_lda_topics.business_id==bus_id].topic_vector.values


0       [0.00332191089528, 0.00332191090332, 0.0033219...
1       [0.57340506259, 0.00157079118967, 0.0015707911...
2       [0.00825910560656, 0.00825910558805, 0.0082591...
3       [0.0694650223507, 1.03776985827e-06, 0.0013300...
4       [0.356803175668, 3.86750170599e-06, 0.68760925...
5       [0.000151147365989, 0.000151147366705, 0.00015...
6       [5.96995680159e-05, 0.0310161854174, 5.9699567...
7       [0.00118132102826, 0.00118132103467, 0.2483894...
8       [0.157983768197, 0.982354241917, 3.17356062175...
9       [2.47567103322e-05, 0.307133362081, 2.47567103...
10      [9.36875879665e-05, 9.36875885459e-05, 0.02838...
11      [0.469285545951, 1.73038050314e-05, 0.81187775...
12      [0.000177569041798, 0.000177569042403, 0.00017...
13      [0.81156629715, 0.0524456148148, 0.00023687866...
14      [0.51756775547, 0.148178855523, 0.000131590672...
15      [0.0986571312876, 0.808608143238, 9.0675161813...
16      [5.21235338442e-05, 5.21235338894e-05, 5.21235...
17      [0.218038710206, 0.000670848924262, 0.00067084...
18      [0.000756189205593, 0.000756189203275, 0.00075...
19      [0.0900094758428, 5.75586469624e-05, 5.7558647...
20      [1.86983045532e-05, 0.062748845755, 0.05806762...
21      [8.8774756818e-06, 8.87747570546e-06, 0.126864...
22      [0.000846734283734, 0.0008467342836, 0.0008467...
23      [0.0659975392429, 0.14756266577, 0.00045745784...
24      [0.000699447851244, 0.000699447854789, 0.00069...
25      [0.0352744765733, 0.000113142814783, 0.0001131...
26      [9.60493210309e-06, 9.60493212642e-06, 0.01211...
27      [0.118556053275, 1.63518345324e-05, 1.63518345...
28      [9.43890973265e-05, 0.0143679854557, 9.4389096...
29      [3.70545584751e-05, 3.7054558481e-05, 3.705455...
                              ...                        
4553    [1.74624226857e-05, 0.00747669199909, 1.746242...
4554    [0.310544023624, 0.000158870934724, 0.00015887...
4555    [1.91298126317e-05, 1.91298126837e-05, 1.91298...
4556    [0.000581089284758, 0.258832377813, 0.00058108...
4557    [0.000128108445398, 0.000128108446163, 0.00012...
4558    [0.000693695206729, 0.767949966865, 0.00069369...
4559    [0.000103202619933, 0.000103202619814, 0.00010...
4560    [0.000200520060084, 0.000200520060188, 0.00020...
4561    [1.31816298214e-05, 0.0046149343208, 0.0487728...
4562    [1.99726626487e-05, 1.99726627734e-05, 0.02607...
4563    [0.000462340481824, 0.000462340479815, 0.13870...
4564    [0.000297430397875, 0.00029743039852, 0.000297...
4565    [0.109657161037, 0.0203361077169, 0.0339372314...
4566    [7.53682798204e-05, 7.53682799359e-05, 0.05671...
4567    [0.000422366813664, 0.365011851402, 0.00042236...
4568    [0.0165772703065, 3.92579603219e-05, 0.0231075...
4569    [0.0046605502586, 0.0241446982159, 0.011693966...
4570    [2.38145391805e-05, 2.38145392674e-05, 0.09532...
4571    [4.54108059514e-05, 4.54108058548e-05, 4.54108...
4572    [0.318188655612, 8.34417804823e-05, 8.34417795...
4573    [4.93159310758e-05, 4.931593107e-05, 4.9315931...
4574    [0.411344773687, 3.85862178391e-06, 0.00261642...
4575    [0.999865676097, 0.000200757304418, 0.00020075...
4576    [0.00037446336421, 0.841406618345, 0.000374463...
4577    [0.000347214954032, 0.000347214954317, 0.00034...
4578    [0.0437383787657, 0.0641565982053, 0.000451798...
4579    [0.00065995525184, 0.000659955245543, 0.000659...
4580    [0.000190401462566, 0.000190401461349, 0.19364...
4581    [0.143898509648, 0.0876387175025, 9.9700656686...
4582    [0.0157366768515, 7.71304698491e-05, 7.7130469...
Name: topic_vector, dtype: object

In [ ]:


In [ ]: