In [2]:
lines = tuple(open('stella.txt', 'r'))
In [6]:
prices = sorted([int(line) for line in lines], reverse=True)
prices[100:]
Out[6]:
In [4]:
import matplotlib.pyplot as plt
%matplotlib inline
rank = list(range(1,len(prices)+1))
plt.scatter(rank,prices)
plt.show()
In [5]:
import numpy
log_prices = numpy.log(numpy.array(prices))
plt.scatter(rank,log_prices)
plt.plot([max(rank),min(rank)], [min(log_prices),max(log_prices)], 'k-', lw=2)
# condition of deficiency, heavy hitter has rubbed off on the rest
Out[5]:
In [20]:
plt.hist(log_prices)
Out[20]:
In [1]:
# contrary to what art rank says
# now for GitRank
In [12]:
import pandas
repos = pandas.read_csv("gitrank.csv", dtype=str)
repos
Out[12]:
In [13]:
stars = [int(s) for s in repos["stars"]]
plt.hist(stars)
# standouts:
# turbulenz / turbulenz_engine
# GarageGames / Torque3D
# AdamsLair / duality
Out[13]:
In [15]:
zipf_stars = [(float(stars[0]) / rank) for rank in range(1,len(stars)+1)]
In [16]:
quality = numpy.array(stars) / numpy.array(zipf_stars)
In [17]:
quality
Out[17]:
In [19]:
for i in range(0,len(stars)):
if quality[i] > 1:
print((quality[i], repos["name"][i]))
In [ ]:
# can say a little more than just that they are anomalies
# can say which are the strangest anomalies