In [2]:
lines = tuple(open('stella.txt', 'r'))

In [6]:
prices = sorted([int(line) for line in lines], reverse=True)
prices[100:]


Out[6]:
[336000,
 336000,
 332300,
 332300,
 321600,
 318400,
 318400,
 317000,
 314500,
 313000,
 305000,
 302500,
 301000,
 292000,
 266500,
 253900,
 242500,
 241000,
 232000,
 230500,
 230500,
 229000,
 226000,
 218500,
 218500,
 216000,
 214750,
 212500,
 211500,
 209000,
 207500,
 204000,
 204000,
 204000,
 200500,
 194500,
 194500,
 192000,
 191000,
 188000,
 187200,
 181750,
 171000,
 171000,
 170500,
 169000,
 168000,
 161000,
 160000,
 158500,
 156000,
 156000,
 156000,
 153600,
 149000,
 138000,
 136300,
 134500,
 133000,
 132000,
 132000,
 126500,
 121000,
 120000,
 118750,
 116500,
 114000,
 112500,
 112500,
 112500,
 112500,
 112500,
 110500,
 108000,
 108000,
 108000,
 107550,
 106250,
 106250,
 105000,
 104250,
 102500,
 102000,
 97000,
 97000,
 96000,
 94000,
 93750,
 93750,
 92750,
 92500,
 92500,
 91000,
 90000,
 90000,
 87500,
 87500,
 87000,
 87000,
 86500,
 86500,
 85000,
 85000,
 82250,
 81250,
 81250,
 80500,
 79500,
 78000,
 75500,
 75000,
 72000,
 71875,
 70600,
 70500,
 69700]

In [4]:
import matplotlib.pyplot as plt
%matplotlib inline

rank = list(range(1,len(prices)+1))
plt.scatter(rank,prices)
plt.show()



In [5]:
import numpy

log_prices = numpy.log(numpy.array(prices))
plt.scatter(rank,log_prices)
plt.plot([max(rank),min(rank)], [min(log_prices),max(log_prices)], 'k-', lw=2)
# condition of deficiency, heavy hitter has rubbed off on the rest


Out[5]:
[<matplotlib.lines.Line2D at 0x10818ddd0>]

In [20]:
plt.hist(log_prices)


Out[20]:
(array([ 51.,  30.,  36.,  37.,  26.,  12.,  15.,   6.,   2.,   1.]),
 array([ 11.1519556 ,  11.67997766,  12.20799972,  12.73602178,
         13.26404384,  13.7920659 ,  14.32008796,  14.84811002,
         15.37613208,  15.90415414,  16.4321762 ]),
 <a list of 10 Patch objects>)

In [1]:
# contrary to what art rank says

# now for GitRank

In [12]:
import pandas
repos = pandas.read_csv("gitrank.csv", dtype=str)
repos


Out[12]:
stars name
0 197812 godotengine/godot
1 78906 SFTtech/openage
2 189518 turbulenz/turbulenz_engine
3 39453 OpenRA/OpenRA
4 210492 GarageGames/Torque3D
5 26302 Gamua/Starling-Framework
6 22544 gameplay3d/GamePlay
7 19727 GarageGames/Torque2D
8 17535 spring/spring
9 18782 jMonkeyEngine/jmonkeyengine
10 198189 AdamsLair/duality
11 13151 Circular-Studios/Dash

In [13]:
stars = [int(s) for s in repos["stars"]]
plt.hist(stars)

# standouts:
# turbulenz / turbulenz_engine
# GarageGames / Torque3D
# AdamsLair / duality


Out[13]:
(array([ 6.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  3.]),
 array([  13151. ,   32885.1,   52619.2,   72353.3,   92087.4,  111821.5,
         131555.6,  151289.7,  171023.8,  190757.9,  210492. ]),
 <a list of 10 Patch objects>)

In [15]:
zipf_stars = [(float(stars[0]) / rank) for rank in range(1,len(stars)+1)]

In [16]:
quality = numpy.array(stars) / numpy.array(zipf_stars)

In [17]:
quality


Out[17]:
array([  1.        ,   0.7977878 ,   2.8742139 ,   0.7977878 ,
         5.32050634,   0.7977878 ,   0.79776758,   0.79780802,
         0.79780296,   0.94948739,  11.02096435,   0.7977878 ])

In [19]:
for i in range(0,len(stars)):
    if quality[i] > 1:
        print((quality[i], repos["name"][i]))


(2.8742139000667302, ' turbulenz/turbulenz_engine')
(5.3205063393525167, ' GarageGames/Torque3D')
(11.020964349988878, ' AdamsLair/duality')

In [ ]:
# can say a little more than just that they are anomalies
# can say which are the strangest anomalies