In [12]:
import numpy as np
from sklearn.manifold import TSNE
import pickle
bus_vecs = pickle.load(open('../output/business_LDA_vectors.pickle','r'))

X = np.array(np.vstack(bus_vecs.topic_vector.values))
print bus_vecs
model = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
points = model.fit_transform(X)


                                   business_id  \
0                       lLI8ObL8aCVbkrrtAW0EHw   
1                       IoxqaBNvzKGuVtaYe6g0Cw   
2                       FFdlPSZCGgTdg1CAfrlvlw   
3                       ahx_KLfRclCmIYSi6iodcQ   
4                       7GNCm1Nl2NOBu31bjkOhcQ   
5                       x9XX-dcVZfpU_J5UWuWzeg   
6                       W0eocyGliMbg8NScqERaiA   
7                       z5CwyvKLt88rmWMG-H6MoQ   
8                       bwu0cUZfksuu-uzTPrrl2A   
9                       Xq9tkiHhyN_aBFswFeGLvA   
10    49e00015973f7d0e8eaffa3e042d8124bd605e26   
11                      6K-wOhRGrgqfU7arDtWuUg   
12    2a7464f9faa0b872fcc3132045a4b29e7661f83c   
13                      FCc2t3iBWZ5lQM6CsFSVXQ   
14                      pe6OU0r4NqVl6qfurqXInQ   
15    f7f932d224c2cac91ee627799189f89cb1709ecf   
16                      f6KGn2OyYk6LWEpnUEgerw   
17                      vE0-p6JOWQAzp3TAO2Ef6w   
18                      e6i8v7VkD8yJk02PH4d9VA   
19    4a46e79fe835ab3b42c5021510584b7d4cbcec00   
20                      Y_bMIBS6QcKtGxRpyU8uEg   
21                      hrnvVr3hmb2oD3oUnJrNSw   
22                      WuCcv_Dyd_1B_1-4jwK7sQ   
23                      bVfkZMFXO7sSr_VNWXWs9w   
24                      Rbcb8yJ8_uqnxACZ991cQg   
25    04e1287c242377722cd669c7898134afde632eaf   
26                      cLojUtqkFpVIHFEvA_5Atw   
27    e1a8a9a34902ee14fd8d79ed0d12002a7413123c   
28                      lJbcTmZxxbLum7HRRydsfA   
29                      wrxMjl7YD90m1yeNunwKOw   
...                                        ...   
5590                    CXtXYZRF5SWAIzcjAOM5jA   
5591  2cfe5e6899dd970a9828f7615e6ed04a9eaf6a80   
5592  e8231b90ee9d5b462111ce206b42164f02bf0736   
5593  6d4eab23b8f43970bac2a0ba3299abf06e72edd6   
5594                    VHehbRMOnPFVjnNdKGf-qQ   
5595                    LTjPK8lhnFcn3MJrmVpKqw   
5596  8e1e7c2238f8338991cb3e95c53733504c6d8615   
5597  1daad68d30f5ad2288c7ac865be408e0efa3b759   
5598                    xtW28KmwigJ8fAPgT6OkFw   
5599                    dELs0VO0zM0KphfwwVpO9A   
5600                    V2isTBBfO0NgLzXO9oq1Mg   
5601                    WNy1uzcmm_UHmTyR--o5IA   
5602                    MORHJ6sMdwkT8DXArbpSXA   
5603                    xu0QNTBquTDx5B4H4qyX-Q   
5604                    gV9IOJ2YFSfTvg_IRjyHAg   
5605  ed728071c9fb9eaf35746b85a041b357382ecfb3   
5606                    HmNMIuTR3lH0OTpkog2dTQ   
5607                    tHQCKWGlYUuef9NFsWZz9g   
5608                    Q1A_wVZypQeM1A3f61wq_A   
5609                    XyE2QzPQjhvMXpSyw5Kvog   
5610                    iFnk7QzYMSv9p8Kd1ISp3g   
5611                    RZma06CqXbOIMzTQL-eJFQ   
5612                    t714WoGjdRWPvzMMN-a1XA   
5613                    WzL3daO29hcteDG1WdRotQ   
5614                    BwcQieTOrADkzYPER984Xg   
5615                    BPN8GzoigZNQHEve5iP-gw   
5616                    nckQIGiiUxztAQ2Boo6Tlw   
5617                    K48K9_N2EOlk1nCLhXxlUg   
5618  8c56dacd539a762baeb271c167d2aee5a228e47f   
5619  307e7d9901dbf5355571687184a8daa9c45205c2   

                                           topic_vector  
0     [0.000229062930194, 0.000227179542173, 0.00022...  
1     [0.00099219268288, 0.0291439965095, 0.00097644...  
2     [0.0398022191712, 0.000240219382268, 0.0002403...  
3     [0.000731347205382, 0.327513927948, 0.00072768...  
4     [0.00342293495591, 0.00341345773075, 0.5053684...  
5     [0.00064194013525, 0.000653512183148, 0.485192...  
6     [0.054672548539, 7.75446316518e-05, 7.76619584...  
7     [0.00444497870996, 0.416838402836, 0.004431963...  
8     [0.0133112554322, 1.98392427299e-05, 0.0580869...  
9     [0.0246230217721, 0.012668597928, 0.0007740738...  
10    [9.76950211031e-05, 9.77812852201e-05, 0.07450...  
11    [0.00047923915469, 0.00047591447753, 0.0004782...  
12    [0.0182598858342, 8.65141315185e-05, 8.6426727...  
13    [0.000681537555438, 0.00069318507884, 0.000687...  
14    [0.000598862897965, 0.000593558319285, 0.00059...  
15    [5.13393045088e-05, 5.05989436086e-05, 5.06480...  
16    [0.0265520517996, 0.420712859397, 0.0014377147...  
17    [0.0612020476259, 0.00144883959363, 0.00143570...  
18    [0.000222642746562, 0.0209163725082, 0.0024163...  
19    [6.6284692934e-05, 0.00338143321082, 6.6305339...  
20    [0.0692388931257, 0.00159674719343, 0.00160435...  
21    [0.0206622417242, 0.000231005354802, 0.0002309...  
22    [0.00329432826651, 6.86426290956e-05, 0.718018...  
23    [0.0015207275909, 0.00150003868912, 0.00151460...  
24    [0.660426191639, 0.000498362147594, 0.39637254...  
25    [5.87302194595e-05, 5.84943575413e-05, 5.86700...  
26    [0.00300733905256, 1.67598632934e-05, 1.691668...  
27    [0.000111879363256, 0.000111898998607, 0.00011...  
28    [0.00408966124486, 0.00062106075685, 0.6408653...  
29    [0.000224135385261, 0.000223934888868, 0.04001...  
...                                                 ...  
5590  [0.0127557456572, 0.0933047167245, 0.000498899...  
5591  [0.000350716826818, 0.0147052055073, 0.0437674...  
5592  [7.40436956339e-05, 0.00194517503572, 7.405903...  
5593  [0.0498217506147, 6.68635237643e-05, 6.6851628...  
5594  [0.014006027716, 0.000230834745542, 0.91278553...  
5595  [0.00599986405558, 0.00598506315714, 0.0059943...  
5596  [6.61144736192e-05, 6.58173725337e-05, 0.00922...  
5597  [9.65441088578e-05, 9.61003713859e-05, 9.71942...  
5598  [0.000234979512753, 0.000236929495802, 0.16574...  
5599  [0.000359313665384, 0.0659738098116, 0.0003604...  
5600  [0.0770447056949, 0.000189091145292, 0.0001889...  
5601  [0.00711598139397, 0.00170744268729, 0.8329296...  
5602  [0.00042252454336, 0.000421178136708, 0.046171...  
5603  [0.000891776236029, 0.000888472386586, 0.00089...  
5604  [0.0612798889885, 0.000160859157798, 0.0257273...  
5605  [0.00082450407161, 9.99362178793e-05, 0.006428...  
5606  [0.000694791715034, 0.000698061539585, 0.00070...  
5607  [0.00194592310927, 0.00194127596081, 0.0356511...  
5608  [0.0239435498399, 0.00131189002523, 0.00130609...  
5609  [0.0352267184806, 9.77612622931e-05, 9.7511255...  
5610  [0.000389317953791, 0.000390171620499, 0.00039...  
5611  [0.0160996844059, 0.00329314587979, 0.00331914...  
5612  [0.00189499390426, 0.000214127057265, 0.480391...  
5613  [0.000748862918798, 0.000746399851203, 0.98098...  
5614  [0.0100522671944, 0.0100645372138, 0.010021158...  
5615  [0.0210228679557, 0.000213179020944, 0.0002136...  
5616  [0.00355402723667, 0.00354915480564, 0.0035450...  
5617  [0.00328924658017, 0.0312638704435, 0.00328918...  
5618  [0.000108388723312, 0.000108558797714, 0.00010...  
5619  [4.64968216715e-05, 4.63563840388e-05, 4.62766...  

[5620 rows x 2 columns]

In [16]:
print points.shape
plt.scatter(points[:,0],points[:,1], s=1)


(5620, 2)
Out[16]:
<matplotlib.collections.PathCollection at 0x7fda2cb19cd0>

In [17]:
from bokeh.charts import Bar, output_file, show
from bokeh.charts.attributes import cat, color
from bokeh.charts.operations import blend
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data

# utilize utility to make it easy to get json/dict data converted to a dataframe
df = df_from_json(data)

# filter by countries with at least one medal and sort by total medals
df = df[df['total'] > 0]
df = df.sort("total", ascending=False)

bar = Bar(df,
          values=blend('bronze', 'silver', 'gold', name='medals', labels_name='medal'),
          label=cat(columns='abbr', sort=False),
          stack=cat(columns='medal', sort=False),
          color=color(columns='medal', palette=['SaddleBrown', 'Silver', 'Goldenrod'],
                      sort=False),
          legend='top_right',
          title="Medals per Country, Sorted by Total Medals",
          tooltips=[('medal', '@medal'), ('country', '@abbr')])


output_file("stacked_bar.html", title="stacked_bar.py example")


show(bar)


-c:12: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [25]:
from nvd3 import multiBarHorizontalChart
chart = multiBarHorizontalChart(name='multiBarHorizontalChart', height=400, width=400)
xdata = [-14, -7, 7, 14]
ydata = [-6, 5, -1, 9]
y2data = [-23, -6, -32, 9]

extra_serie = {"tooltip": {"y_start": "", "y_end": " balls"}}
chart.add_serie(name="Serie 1", y=ydata, x=xdata, extra=extra_serie)

extra_serie = {"tooltip": {"y_start": "", "y_end": " calls"}}
chart.add_serie(name="Serie 2", y=y2data, x=xdata, extra=extra_serie)
chart.buildcontent()

In [26]:
chart


js not initialized - pausing to allow time for it to load...
Out[26]:

In [ ]: