In [2]:
import pylab as pl
import pandas as pd
from datetime import datetime,timedelta
figureDir = "/home/ubuntu/github/patchwork/figures/"
sys.path.append("/home/ubuntu/github/patchwork/")
try:
reload(parseUserNames)
reload(analyze_patchwork)
reload(parseEvents)
except:
import parseUserNames
import analyze_patchwork
import parseEvents
from parseUserNames import *
from analyze_patchwork import *
from parseEvents import *
matplotlib.rcParams.update({'font.size': 18,'legend.fontsize': 10})
#df,df2014,df2015,resampled = build_main_df()
In [3]:
dic = makeEventList()
df = pd.DataFrame(dic)
df.index = df['date']
userDic,pwDates = parseUsers()
In [4]:
def userTimeline(username,resol = 1,plot=False):
date = time.mktime(userDic[username]['pwDate'][0].timetuple())/3600/24
x = df[df['user']==username].date.resample("%sD"%resol,how="count").index.astype(np.int64)// 10**9 /3600/24
x = x - date
y = df[df['user']==username].date.resample("%sD"%resol,how="count").values
if plot:
pl.bar(x,y,width=resol)
pl.xlabel("Delta t [days]")
pl.ylabel("Events per day")
return x,y
In [5]:
#pl.close("all")
#pl.figure(1,(13,9))
resol = 1
X = np.arange(-1000,1000)
Y = np.zeros_like(X)
uY = np.zeros_like(X)
for i,username in enumerate(userDic.keys()):
try:
x,y = userTimeline(username,resol = resol,plot=False)
Y[map(int,x+ 1000)] += y
uY[map(int,x+ 1000)] += 1
except:
print i,username, "error"
continue
#pl.xlabel("Delta t [days]")
#pl.ylabel("Events per day")
In [10]:
r = Y/map(float,uY)
In [55]:
lim =70
c = (X > -lim)*(X < lim)
pl.close("all")
pl.figure(1,(12,15))
pl.subplot(311)
pl.bar(X[c],Y[c])
pl.xlabel("time - tc [days]")
pl.ylabel("Activity")
pl.subplot(312)
pl.bar(X[c],r[c])
pl.xlabel("time - tc [days]")
pl.ylabel("Activity/nDevs")
pl.subplot(313)
pl.bar(X[c],uY[c])
pl.xlabel("time - tc [days]")
pl.ylabel("uActivity")
pl.ylim(ymin=400)
Out[55]:
In [91]:
pl.figure(1,(15,9))
pl.subplot(121)
fit = S.linregress(uY,Y)
print "linear fit: ", fit
pl.plot(uY,Y,'o')
pl.plot(uY,uY*fit[0] + fit[1],'r-')
pl.xlabel("daily users")
pl.ylabel("daily events")
pl.subplot(122)
cond = (uY > 0)*(Y > 0)
lx = np.log10(uY[cond])
ly = np.log10(Y[cond])
fit = S.linregress(lx,ly)
print "scaling fit: ", fit
pl.plot(lx,ly,'bo')
pl.plot(lx,lx*fit[0] + fit[1],'r-')
pl.xlabel("log10(daily users)")
pl.ylabel("lgo10(daily events)")
Out[91]:
In [67]:
lim =30
c = (X > -lim)*(X < lim)
x = X[c]+1
y = uY[c]
y = y - y[-1] + 1
pl.figure(1,(13,7))
pl.subplot(121)
c1 = x < 0
pl.plot(-x[c1],y[c1],'o-')
pl.subplot(122)
c2 = x > 0
lx = np.log10(x[c2])
ly = np.log10(y[c2])
fit = S.linregress(lx,ly)
print fit
pl.plot(lx,ly,'o-')
pl.plot(lx,lx*fit[0]+fit[1],'r-')
pl.figure(2)
#pl.plot(10**lx,10**ly,'o-')
pl.plot(x,y)
pl.plot(10**lx,y[-1] + 10**(lx*fit[0]+fit[1]),'r-')
Out[67]:
In [102]:
upper_limit = 500
print upper_limit
c1 = (X>=0)*(Y>0)*(X < upper_limit)
lx = np.log10(X[c1]+1)
ly = np.log10(Y[c1])
c2= (X<=0)*(Y>0)*(X > -upper_limit)
lx2 = np.log10(-X[c2]+1)
ly2 = np.log10(Y[c2])
fit2 = S.linregress(lx2,ly2)
print "before tc: ", fit2
fit1 = S.linregress(lx,ly)
print "after tc: ", fit1
pl.figure(1,(13,9))
pl.plot(lx,ly,'b-.')
pl.plot(lx,lx*fit1[0]+fit1[1],'b-')
pl.plot(lx2,ly2,'r-.')
pl.plot(lx2,lx2*fit2[0]+fit2[1],'r-')
pl.xlabel("log10(time - tc [days])")
pl.ylabel("log10(Activity)")
Out[102]:
In [81]:
c1 = (X>0)*(Y>0)*(X>1)*(X < 600)
c2 = (X<0)
print S.linregress(X[c1],Y[c1])
plot(X[c1],Y[c1])
plot(-X[c2],Y[c2],'r-')
Out[81]:
In [22]:
jDates = []
for item in userDic.items():
jDates.append(item[1]['jDate'])
#print item[0],item[1]['jDate']
jDates = np.sort(jDates)
#bD0 = datetime(2008, 2, 1, 0, 0, 0)
bD0 = pwDates[0] - timedelta(days=60)
bD1 = datetime(2015, 8, 30, 0, 0, 0)
dayRes = 7
hourRes = 24*3600*dayRes
dateBins = [bD0]
while dateBins[-1] < bD1:
dateBins.append(dateBins[-1] + timedelta(seconds = hourRes))
dateBins = np.array([time.mktime(datetime.timetuple(t)) for t in dateBins])/(24*3600)
jDatesTS = np.array([time.mktime(datetime.timetuple(t)) for t in jDates])/(24*3600)
pwDatesTS = np.array([time.mktime(datetime.timetuple(t)) for t in pwDates])/(24*3600)
H = np.histogram(jDatesTS,bins=dateBins)
In [42]:
pl.figure(1,(14,12))
pl.subplot(221)
y = np.arange(1,len(jDates)+1)
pl.plot(jDates,y,lw=2)
y = range(0,850)
for date in pwDates:
pl.plot([date]*len(y),y,'r-',lw=1)
#Count Events
resol = "1W"
x = df.user.resample(resol,how="count").index
y = df.user.resample(resol,how="count").values
pl.plot(x,y/20.,'-',lw=2)
pl.xlabel("Time [years]")
pl.ylabel("Cumulative Joining")
pl.ylabel("CumJoin (blue) / wEvents/20 (green)")
pl.xticks(rotation=90)
pl.xlim(xmin=x[0])
pl.ylim(ymax=850)
pl.subplot(222)
#Joining Rate
y = np.arange(1,len(jDates)+1)
pl.plot(jDates,y,lw=2)
#Count Events
resol = "1W"
x = df.user.resample(resol,how="count").index
y = df.user.resample(resol,how="count").values
pl.plot(x,y/10.,'-',lw=2)
#Patchwork Events
y = range(0,max(y)/10+20)
for date in pwDates:
pl.plot([date]*len(y),y,'r-',lw=1)
pl.xlim(xmin=pwDates[0]-timedelta(days=30),xmax=pwDates[-1]+timedelta(days=30))
pl.ylim(0,max(y))
pl.xlabel("Time [years]")
pl.ylabel("CumJoin (blue) / wEvents/10 (green)")
pl.xticks(rotation=90)
#pl.subplot(223)
#bar(H[1][:-1]-H[1][0],H[0],width=dayRes,lw=0)
#pl.xlabel("Time [days since 2 months before 1st patchwork]")
#pl.ylabel("Joining per %s days"%dayRes)
y= range(0,15)
for date in pwDates:
pl.plot([pwDatesTS - H[1][0]]*len(y),y,'r-',lw=1)
pl.xticks(rotation=90)
pl.savefig(figureDir + "cumulativeJoining.png")
In [34]:
#Count Events
resol = "1W"
x = df.user.resample(resol,how="count").index.astype(np.int64) // 10 ** 9 / 3600 /24.
minx = min(x)
x = x - minx
pl.figure(1,(9,9))
pw0 = time.mktime(pwDates[0].timetuple()) / 3600 /24. - minx
ypw0 = np.arange(0,5000)
plot(np.zeros_like(ypw0)+pw0,ypw0,'r-')
y = df.user.resample(resol,how="count").values
pl.plot(x,y,'-',lw=2)
c0 = (x>400)*(x < pw0)
fit0 = S.linregress(x[c0],y[c0])
print fit0
pl.plot(x[c0],x[c0]*fit0[0] + fit0[1],'r-')
pl.text(450,450,"slope = %.2f \n (std. err. = %.2f)"%(fit0[0],fit0[-1]),verticalalignment='center',horizontalalignment='center')
print pwDates[0],pw0
c1 = (x > pw0)*(x < 1350)
fit1 = S.linregress(x[c1],y[c1])
print fit1
pl.plot(x[c1],x[c1]*fit1[0] + fit1[1],'r-')
pl.text(1200,1500,"slope = %.2f \n (std. err. = %.2f)"%(fit1[0],fit1[-1]),verticalalignment='center',horizontalalignment='center')
jump = (x[c1][0]*fit1[0] + fit1[1]) - (x[c0][-1]*fit0[0] + fit0[1])
pl.text(850,3500,"jump: \n %.0f events \n per day"%jump,verticalalignment='center',horizontalalignment='center')
pl.xlabel("time [days since 2012]")
pl.ylabel("daily events")
pl.savefig(figureDir + "regime_shift.png")
In [388]:
figure(1,(12,8))
resol = "1W"
x = df.user.resample(resol,how="count").index
y = df.user.resample(resol,how="count").values
pl.plot(x,y,'-')
y = range(0,max(y))
for date in dateEvents:
pl.plot([date]*len(y),y,'r-',lw=1)
pl.xticks(rotation=90)
Out[388]:
In [265]:
L = []
for user in userDic.keys():
l = len(df.user[df['user'] == user])
if l > 0:
print user,l
L.append(l)
In [6]:
deltaJ = []
pwDate = []
pwPlace = []
pwUsers = []
for item in userDic.items():
#print item[1]['jDate'],np.min(item[1]['pwDate'])
jDate = time.mktime(datetime.timetuple(item[1]['jDate']))
pwd = time.mktime(datetime.timetuple(np.min(item[1]['pwDate'])))
dt = pwd - jDate
deltaJ = np.append(deltaJ,-float(dt)/(3600*24.*365.))
pwDate = np.append(pwDate,pwd)
pwPlace = np.append(pwPlace,item[1]['pwPlace'])
pwUsers = np.append(pwUsers,item[0])
In [242]:
pl.figure(1,(10,5))
H = np.histogram(deltaJ,bins=100)
pl.bar(H[1][:-1],H[0],width=H[1][1] - H[1][0],lw=0)
pl.xlabel("Delta time [years] between joining \n and (first) patchwork attendance")
pl.ylabel("(unnormed) Density")
pl.xlim(-7.5,2)
pl.savefig(figureDir + "density_lag_joining_attendance.png")
In [417]:
print len(uPwDate)
In [ ]:
uPwDate = np.sort(np.unique(pwDate))
for d,dx in enumerate(uPwDate):
pl.figure(d,(6,3))
c = (pwDate == dx)
pl.title("%s (%s)"%(pwPlace[c][0],datetime.fromtimestamp(dx).strftime("%Y/%m/%d")))
H = np.histogram(deltaJ[c],bins=50)
pl.bar(H[1][:-1],H[0],width=H[1][1] - H[1][0],lw=0)
#pl.xlabel("Delta time [years] between joining \n and (first) patchwork attendance")
#pl.ylabel("(unnormed) Density")
pl.xlim(-7.5,2)
#print "('" + "','".join(list(pwUsers[c])) +"')"
In [414]:
euCDF = eventsPerUserCDF(dic)
x,y = rankorder(euCDF['events'])
loglog(x,y,'.')
xlabel("Counts events per user")
ylabel("Rank Ordering (CCDF)")
December 10, 2014: Atlanta
February 2, 2015, Melbourne
In [68]:
def burstPlot(x,y):
c = (y >0)*(x>0)
lx = np.log10(x[c])
ly = np.log10(y[c])
fit = S.linregress(lx,ly)
print fit
pl.plot(lx,ly,'.')
pl.plot(lx,lx*fit[0]+fit[1],'k-')
return fit