In [2]:
%load_ext autoreload
%autoreload 2
import jsitbad
import numpy as np
%matplotlib inline


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [3]:
data = jsitbad.load_js_files('Javascript/hard_js/*')
[x['name'] for x in data]


Out[3]:
['06b61815fc2af0ca07d76b6228e5db75779cbe43f1f6be97438474dc19984775.bin.out',
 '0a177f9b134ec2f72654df45cac91184a0633f17786ebe85bca78994f4496b36.out',
 '1c0b3327d216196d89bb7ea3cfbabb2458366a731a354c6ed2146109db486114.out',
 '228d303cb00ebe9d5e05fe6f908db907074656ab25934d64ab3d09f2b318bdda.out',
 '2ab4523be1320c32b02a357548dc8e176769310c2ccec82b79e5badddf950544.out',
 '2cee1e15cde38907aa427da3e2161c4894d62e084b5adac92dcd9636bf8580e8.out',
 '308d286e942b956dc2c6bf3c038d5a130f7c84422bca12541ea6a263ef356a3b.out',
 '397b84bffb960bb3c411d84179e42bba9f39cbc4d68cb1dc7c0e232a7f80e3c7.out',
 '3aab5e9cbcc82b133b0e984b040b0ef0e891bf59a541a28db52e84f1f46a6b6f.out',
 '497e7e339f1cdf2944ec4d37de64aba8a6b42500d526638858e08f4121ca71c9.out',
 '4aefff415d714a37d1a28a580b15493707a816006b4031d52a9b4d41061d5395.out',
 '55ffc6702bc45eb91b8d47c7b493d71c2d801517471b8ae74a317b9f1249c325.out',
 '5736428cc753983cc74feabab651261fb5cf62e935e1bb7eb850287bede38cc5.out',
 '5d2813cbfcd5a8c357665140bf3b0f9927d338317be7c8c91bf3d9601f058b8d.out',
 '653474d0a02113770ca19b1e0dc2240e84ff665c4676be45d03bad9973ba3ba5.out',
 '692c41429580d3dc74690f80fc67579c404afcd0ded80776ad36f90b4db9c520.out',
 '69a9b56fc79a21121b5b58d6c5db0a47c13dc6f2305ef2ab7c4de77b67203c88.out',
 '69c93ba9e419ecac8de53c50cfd0ed90fc11fd58f9093a5933b5d108c8412fd5.out',
 '836345ef9d888a25d51fa56a5179956f50f2327481e515b6cd4e221999283be2.out',
 '8475c80ac5ff70bc7cba4643937887d72517d5442edf249d6a7ee30b77b340d4.out',
 '84b64ddf614bafa8834418ff67ae1ad7206b7d029aeba27701a4b45f308f2cbc.out',
 '8506cbff64de2a57c299d5934fb5d2e5769cfba20b5b637853b9996d7299d17c.out',
 '86c33024e043da6487b8d10c5ea36cde01f11ad7f575ab7ae17ec236b429c2a6.out',
 '888498646eca9141e5cfc508c923727f0f007dad6096437ad203478d815245cd.out',
 '8e67715cf49cbe5dba849ff53ce6fe06494b6e9190904650900d1ad40e21b105.out',
 '8f347f9ab1e4c7465aaa90cf196499fdf9d93b92359cac786b1de1e22f408fb8.out',
 '9320fe2e9f343bde9668f7213b9a7789053ae9cfb8fed71b60e6cf7a9d9a1571.out',
 'a21bba07bda13d9d6f30d6df4af475175de29ddfd9b24de0d689c132ec1bf6ae.out',
 'a2da153cf299364df460cd103420eb2416546e515bfbb1de6e1e64f96ccfedaf.out',
 'a95edb2a68cc508da9cdc7b449664fe179c53c2263932362486684b046c0e1e3.out',
 'aa0529ff5a4c1cf90a3c293acf1f9657dcfdee3ac5e7428975d24ce3f38c9de1.out',
 'angular.min.js',
 'b3b4d65a69fc9e9a2f384b16c5de4d677442f94663a0c19812ce1946ee98b93c.out',
 'c4ea89f864cbe379b9d753ccdd671ce022b79987d712213a957c00140dbdd573.out',
 'cb44cc89df1772650660140213a874aa2ff0fd9cb1eada260688ceedac9becf5.out',
 'cd17679552f466267e21c1628369d8afac032df7e6a98b4531d09e30a6ce8c6e.out',
 'Core.Web.js',
 'd3.v3.js',
 'd3.v3.min.js',
 'd87dc1a02b4587be5366434caa3c43615970a24048270c18ff8e28998a0760f6.out',
 'd9233a313422542602b125331ec923237ecb5f67dc1517000879d30f0b5be732.out',
 'de1375a9e6927932752467fe04c4a91592574caf7c334417e86803abf524b3d5.out',
 'e9800f27de42158099774f19019239c0c0b4596c8a099b63cdcc8cd50e0ac718.out',
 'ea041d975d90b765ec5fa354018456d5c49addcaec0d37a29efa7b6fcea6444f.out',
 'eaa9d553b27428afee186f666dcd449b2eae8e86f2e1120c4ed39fd7cdd8d2fb.out',
 'eab4e3502ed44f50d8d144614232616938122c37bd0ff9683490766a794ad635.out',
 'ember.min.js',
 'jquery-2.1.3.js',
 'jspdftest.pdf.out',
 'midori.js',
 'MochiKit.js',
 'MooTools-Core-1.5.1.js',
 'q-4.1.min.js',
 'webix.js']

In [4]:
X = np.array([[f(x['code']) for f in jsitbad.features] for x in data])
X[:2]


Out[4]:
array([[  1.67880000e+04,   2.30000000e+01,   7.29913043e+02,
          1.58200000e+03,   0.00000000e+00,   6.00000000e+00,
          4.00000000e+00],
       [  1.55720000e+04,   3.88700000e+03,   4.00617443e+00,
          1.60000000e+01,   0.00000000e+00,   1.00000000e+00,
          3.00000000e+00]])

In [15]:
import jsitbad
jsitbad.project_on_plane(X, [x['color'] for x in data])


<matplotlib.figure.Figure at 0x10ac1c080>

In [17]:
jsitbad.project_on_plane(X, [x['color'] for x in data], unique='t-SNE')


<matplotlib.figure.Figure at 0x10a8840b8>

In [20]:
jsitbad.project_on_plane(X, [x['color'] for x in data], unique='LLE', labels=[x['name'] if x['color']=='g' else '' for x in data])


<matplotlib.figure.Figure at 0x10af32ba8>

Conclusions provisoires

Les fichiers midori.js et Core.Web.js sont mal positionnés. Ils sont plein de commentaires. Une anlyse à base de lexer devrait régler ça, toujours sans parsing.


In [ ]:
f = jsitbad.train_from_js_tokens([x['code'] for x in data])
X_tokens = f([x['code'] for x in data])

In [ ]:
jsitbad.project_on_plane(X_tokens, [x['color'] for x in data])