In [2]:
import pandas as pd
import numpy as np
from ggplot import *
import time
import datetime as dt
import random
import scipy as sc
import scipy.stats as ss
import statsmodels.api as smapi
import statsmodels.graphics as smgraphics
from statsmodels.formula.api import ols
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
import json
In [4]:
with open ('data/deps-size-all.ldjson', "r") as datafile:
text = '[%s]' % ',\n'.join(datafile.read().strip().split('\n'))
d_packages = pd.read_json(text)
d_packages.columns
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-f2f55c1e4451> in <module>()
2 with open ('data/deps-size-all.ldjson', "r") as datafile:
3 text = '[%s]' % ',\n'.join(datafile.read().strip().split('\n'))
----> 4 d_packages = pd.read_json(text)
5
6 d_packages.columns
/Users/anand/data/npm/env/lib/python2.7/site-packages/pandas/io/json.pyc in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit)
196 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
197 keep_default_dates, numpy, precise_float,
--> 198 date_unit).parse()
199
200 if typ == 'series' or obj is None:
/Users/anand/data/npm/env/lib/python2.7/site-packages/pandas/io/json.pyc in parse(self)
264
265 else:
--> 266 self._parse_no_numpy()
267
268 if self.obj is None:
/Users/anand/data/npm/env/lib/python2.7/site-packages/pandas/io/json.pyc in _parse_no_numpy(self)
481 if orient == "columns":
482 self.obj = DataFrame(
--> 483 loads(json, precise_float=self.precise_float), dtype=None)
484 elif orient == "split":
485 decoded = dict((str(k), v)
ValueError: Expected object or value
In [26]:
d_packages['num_dependencies'] = d_packages['dependencies'].map(len)
d_packages['num_devdependencies'] = d_packages['devDependencies'].map(len)
In [35]:
d_packages.describe()
Out[35]:
latest
size
num_dependencies
num_devdependencies
count
5524
5504.000000
5524.000000
5524.000000
mean
0.160572
331497.266897
4.238595
2.807748
std
0.367169
2109101.184107
6.067839
3.698237
min
False
165.000000
0.000000
0.000000
25%
0
4507.750000
0.000000
0.000000
50%
0
14104.500000
2.000000
1.000000
75%
0
83817.250000
5.000000
4.000000
max
True
27056268.000000
32.000000
28.000000
In [36]:
d_packages[d_packages['size'] > d_packages['size'].quantile(.9)][d_packages['latest']]
Out[36]:
dependencies
description
devDependencies
latest
name
size
version
num_dependencies
num_devdependencies
28
[ejs, grunt, grunt-contrib-clean, grunt-contri...
1:1 ticket and laptop system
[mocha]
True
1-1-help-desk-system
306666
0.0.7
18
1
52
[qs, debug, mime, gm, ws, musicmetadata, mmmag...
10er10 is an HTML5 audio jukebox. It works on ...
[]
True
10er10
22254752
0.23.0
11
0
62
[express, mongoskin, mustache, jade, sugar, va...
CRUD over express and mongodb
[]
True
10tcl
1393901
0.0.10
8
0
142
[express, static-favicon, morgan, cookie-parse...
None
[]
True
2048.io
330707
0.0.1
7
0
147
[]
A node.js module for querying genome data in t...
[]
True
23query
8261189
0.0.1
0
0
334
[]
3kenizer
[mocha, jamjs]
True
3kenizer
1347501
0.1.2
0
2
795
[async, azure, bcrypt-nodejs, cloudinary, conn...
47Pages fork of the Node.js CMS: Web Applicati...
[browserify, chalk, fs-extra, gulp, gulp-cover...
True
47pages-keystone
1729805
0.0.5
23
21
906
[promise, request, ws]
Node.js module for 6px
[]
True
6px
477885
0.1.6
3
0
1058
[acorn-6to5, ast-types, chokidar, commander, e...
Turn ES6 code into readable vanilla ES5 with s...
[browserify, chai, istanbul, jshint, jshint-st...
True
6to5
469274
1.15.0
15
9
1175
[shelljs]
An all-in-one package for 7zip
[]
True
7zjs
532583
1.0.1
1
0
1252
[underscore, backbone]
One Collection different models, mapped easy v...
[grunt]
True
Backbone.Chosen
7479381
0.1.1
2
1
1404
[coffee-script]
Hierarchical time-series axis for charts with ...
[coffeedoctest]
True
ChartTime
332943
0.1.0
1
1
1562
[underscore, underscore-data, simple-mime, coo...
Simple middleware
[]
True
F
6403985
0.0.4
10
0
1580
[]
Generate massive amounts of fake contextual data
[jshint, istanbul, mocha, node-minify, optimis...
True
Faker
277301
0.7.2
0
6
1622
[async, coffee-script, datejs, discount, expre...
Friggeri.net
[]
True
Friggeri.net
5048320
0.2.6
8
0
1642
[azure, step, options]
Github hook for node.js apps hosted in Microso...
[mocha, should]
True
GitAzure
407924
0.0.7
3
2
1655
[tape, kdtree]
Convert GPS latitude/longitude pairs to US zip...
[]
True
Gps2zip
784590
1.0.0
2
0
1667
[mongodb]
GridFS made easy.
[]
True
GridFS
273140
0.3.0
1
0
1735
[nopt, generic-pool, iconv, ipaddr.js, semver,...
An SMTP Server project.
[nodeunit]
True
Haraka
325500
2.5.0
14
1
1852
[]
CSS-like selectors for JSON
[]
True
JSONSelect
245760
0.4.0
0
0
1890
[irrklang]
a real-time multiuser audio sequencer
[]
True
JSONloops
2095966
0.1.0
1
0
1977
[]
A JavaScript implementation of a extendable, f...
[]
True
JSV
1031217
4.0.2
0
0
2160
[tztime, jsduckify, JSON2]
Illuminating the forest AND the trees in your ...
[coffee-script, coffeedoctest, uglify-js, node...
True
Lumenize
1408413
0.7.2
3
12
2178
[express, nohm, node-native-zip]
Ultra lightweight MVC for Node.js
[]
True
Monorail.js
440742
1.0.1
3
0
2242
[es5-shim, shelf.js, commander, smoosh, JSUS, ...
Powerful and versatile 100% javascript object ...
[docker, mocha, should]
True
NDDB
229784
0.9.8
7
3
2291
[request, mime, nomnom, xml2js]
Generates an NFO for a video file
[]
True
NFO-Generator
27056268
1.0.0beta25
4
0
2354
[debug, ffi, ref, ref-struct]
The Node.js ⇆ Objective-C bridge
[libxmljs, dox, memwatch, highlight.js, jade, ...
True
NodObjC
541861
1.0.0
4
6
2362
[express, socket.io, coffee-script]
Project to create an keynote node presenter
[]
True
NodeKeynote
453519
0.0.1
3
0
2564
[underscore, underscore.string, mongodb, redis...
[stylus, less, uglify-js, pathfinder, async, m...
True
Rajas
695798
0.0.1
8
15
2653
[canvas]
Charting and graphing node.js module
[]
True
TeeChart
390703
0.1.4
1
0
2705
[]
None
[]
True
Thimble
1232864
0.0.1
0
0
2828
[tar]
WordNet 3.1 Database files
[]
True
WNdb
9907442
3.1.1
1
0
2837
[express]
A selectively caching proxy for repeatable cli...
[]
True
WebParrot
3434439
0.0.3
1
0
2958
[cli-color, duplex]
A short node workshop
[]
True
a-taste-of-node
819299
2.1.0
2
0
2999
[marked, mdash, highlight.js]
HTML Academy Developer Tools
[]
True
a2
9147496
0.0.1
3
0
3154
[]
Jots and tittles and JSON
[express, mounce-koine-dictionary, lexham-engl...
True
aaronshaf-bible-data
3134412
2.2.1
0
4
3174
[]
AngularJS A/B Test Service and Directives for ...
[gulp, gulp-complexity, gulp-jshint, gulp-ngmi...
True
ab-test
769976
0.0.1
0
13
3619
[xml2js, request, commander]
Unofficial node module for calling ABBYY's OCR...
[mocha]
True
abbyy-ocr
418410
0.0.4
3
1
3645
[lodash, express, socket.io, async, dargs, mkd...
Core module for abc-web
[]
True
abc-web-core
4815543
0.0.5
9
0
3662
[glob]
A Protractor JS pre-processor to load ABE Spec...
[abe-spec, bower, grunt, grunt-cli, grunt-cont...
True
abe-protractor
418235
0.1.3
1
16
4152
[level]
Autocomplete
[chai, coveralls, istanbul, mocha]
True
ac
3770564
0.0.2
1
4
4547
[split, sext, minimist, through, mysql, csv-st...
php light FE/BE framework
[tape]
True
aced
1229268
0.0.12
10
1
4579
[coffee-script, async, mongoskin, j3]
ERROR: No README.md file found!
[packflow]
True
acf
239597
0.2.1
4
1
4916
[]
ECMAScript parser
[regenerate, unicode-7.0.0]
True
acorn
422735
0.10.0
0
2
4930
[]
Acorn fork used by 6to5
[regenerate, unicode-7.0.0]
True
acorn-6to5
729655
0.9.1-14
0
2
4932
[]
ECMAScript parser
[regenerate, unicode-7.0.0]
True
acorn-bind-operator
416746
0.7.1
0
2
4956
[]
Alternative React JSX parser
[regenerate, unicode-7.0.0]
True
acorn-jsx
690564
0.9.1-7
0
2
4962
[]
ECMAScript parser
[]
True
acorn-semicolon
252206
0.1.1-semicolon
0
0
4968
[concat-stream, fpcalc, hyperquest]
Get music metadata from AcoustID Web Service
[tape]
True
acoustid
3093882
1.2.1
3
1
5114
[express, socket.io, underscore, node-uuid]
Use Socket.io to drive CasperJS
[mocha, expect.js, cucumber, http-server]
True
action-at-a-distance
1703754
1.0.20
4
4
5167
[grunt, browser_fingerprint, node-resque, asyn...
actionhero.js is a multi-transport API Server ...
[mocha, should, request, redis-sentinel-client]
True
actionhero
230400
9.4.1
15
4
5226
[]
Define your Class with JavaScript.
[grunt, grunt-contrib-clean, grunt-contrib-con...
True
active-class
268797
0.0.1
0
18
5260
[cli, fs-extra, showdown, underscore, undersco...
A tool for generating reactive documents from ...
[backbone, browserify, codemirror, coffee-scri...
True
active-markdown
473986
0.3.2
5
11
5289
[any-db, mysql, pg, sqlite3]
Rails ActiveRecord inspired for Nodejs.
[any-db, mysql, pg, sqlite3]
True
active_record
275688
0.1.4
4
4
5502
[]
Functions to create, process and test objects
[grunt, grunt-contrib-jshint, grunt-mocha-cli,...
True
adam
562403
0.2.0
0
9
5517
[bootstrap]
Base css and assets for use across projects
[]
True
adamrneary-base-css
9876362
0.0.1
1
0
5520
[async, colors, glob, iconv-lite, lazy, rimraf...
A light weight native JavaScript implementatio...
[nodeunit]
True
adamvr-geoip-lite
19006605
1.2.0
7
1
In [21]:
# distribution of sizes
hist_size = ggplot(aes(x='size'), data=d_packages) + geom_histogram()
hist_size.draw()
Out[21]:
In [18]:
# distribution of dependency count
hist_deps = ggplot(aes(x=d_packages.dependencies.map(len)), data=d_packages) + geom_histogram() + xlab('dependencies')
hist_deps.draw()
Out[18]:
In [37]:
# distribution of devDependency count
hist_devdeps = ggplot(aes(x='size'), data=d_packages) + geom_histogram() + xlab('dev dependencies')
hist_devdeps.draw()
Out[37]:
In [41]:
scat = ggplot(aes('num_dependencies','num_devdependencies'), data=d_packages) + geom_point()
scat.draw()
Out[41]:
In [ ]:
# remove some outliers
subset = d_packages
print( len(subset)/float(len(d_packages)))
In [5]:
subset.columns
Out[5]:
Index([u'id', u'latest_version', u'version_major', u'version_minor', u'version_patch', u'created', u'modified', u'maintainer_count', u'version_count', u'keywords', u'createdAt', u'modifiedAt', u'deltaSinceModified', u'age', u'delta', u'log_delta', u'deltaSinceModifiedDays'], dtype='object')
In [12]:
Content source: anandthakker/npm-data-exploration
Similar notebooks: