In [16]:
####Load Modules & Build DataFrames
project_dir = "/home/ubuntu/github/AstroWeekStudy/python_code/"
import sys
sys.path.append("/home/ubuntu/github/AstroWeekStudy/python_code/")
try:
reload(loadData)
reload(astroWeekLib)
except:
import loadData
import astroWeekLib
from loadData import *
from astroWeekLib import *
df,df2014,df2015,resampled = build_main_df()
df_users_created = prepareUserDf(df)
df_repos_created = build_df_repos_created(df2014)
In [25]:
def burstPlot(x,y):
c = (y >0)*(x>0)
lx = np.log10(x[c])
ly = np.log10(y[c])
fit = S.linregress(lx,ly)
print fit
plot(lx,ly,'o')
plot(lx,lx*fit[0]+fit[1],'k-')
return fit
In [18]:
x = resampled['activity']['repos'].values
y = resampled['activity']['events'].values
slope = burstPlot(x,y)[0]
In [19]:
sAE = 1.14
sAR = 1.05
sRE = 1.07
print 1.05*1.07
In [20]:
activity_type = ['events','actors','repos']
tseries = {}
for at in activity_type:
print "%s activity"%at
test = resampled['activity'][at]
before = test[test.index >= astroweek[0]].values
during = test[(test.index >= astroweek[0]) & (test.index < astroweek[1])].values
after = test[(test.index > astroweek[1]) & (test.index <= time_boundaries[1])].values
after_start = test[(test.index >= astroweek[0]) & (test.index <= "2015-04-20")].values
tseries[at] = {'before' : before, 'during' : during, 'after' : after, 'after_start' : after_start}
In [26]:
period = 'before'
x_type = 'actors'
y_type = 'events'
pl.figure(1,(10,9))
burstPlot(tseries[x_type][period],tseries[y_type][period])
xlabel("log10(%s)"%x_type)
ylabel("log10(%s)"%y_type)
pl.savefig(project_dir + "figures/productive_burst_before.eps")
In [15]:
4/3.
Out[15]:
In [9]:
period = 'after'
x_type = 'actors'
y_type = 'events'
burstPlot(tseries[x_type][period],tseries[y_type][period])
xlabel("log10(%s)"%x_type)
ylabel("log10(%s)"%y_type)
Out[9]:
In [10]:
period = 'after_start'
x_type = 'actors'
y_type = 'events'
burstPlot(tseries[x_type][period],tseries[y_type][period])
xlabel("log10(%s)"%x_type)
ylabel("log10(%s)"%y_type)
Out[10]:
In [107]:
def PrepareRepoDf(df):
u_repo_url = np.unique(df.repo_url.values)
repo_dic = {}
for repo_url in u_repo_url[:]:
#print repo_url
created_at = df[df['repo_url']==repo_url]['repo_created_at'][0]
event_count = len(df[df['repo_url']==repo_url])
user_count = len(np.unique(df[df['repo_url']==repo_url]['actor'].values))
type_freq = dict(zip(*np.unique(df[df['repo_url']==repo_url]['type'].values, return_counts=True)))
repo_dic[repo_url] = {"event_count": event_count, "user_count" : user_count,'created_at':created_at,'type_freq':type_freq}
return repo_dic
In [109]:
repo_dic = PrepareRepoDf(df)
In [126]:
uc = []
ec = []
len_type_freq = []
for key in repo_dic.keys():
uc.append(repo_dic[key]['user_count'])
ec.append(repo_dic[key]['event_count'])
len_type_freq.append(len(repo_dic[key]['type_freq']))
In [121]:
len(repo_dic[key]['type_freq'])
Out[121]:
In [133]:
loglog(len_type_freq,ec,'o')
pl.xlabel("number of different event types")
pl.ylabel("total event count")
Out[133]: