In [233]:
import augur
# import everything from githubapi.py and ghtorrent.py so we can
# just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import sqlalchemy as s
import datetime
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
from scipy import stats
from pandas.api.types import is_numeric_dtype
%matplotlib inline
# create an Augur application so we can test our function
augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()
In [234]:
df = ghtorrent.issue_comment_time('rails', 'rails')
In [235]:
df['opened'] = pd.to_datetime(df['opened'],errors='coerce')
df = df.set_index(['opened'])
df = df.dropna()
In [236]:
test = df[np.abs(df['minutes_to_comment']-df['minutes_to_comment'].mean()<=(5*df['minutes_to_comment'].std()))] #keep only the ones that are within +3 to -3 standard deviations in the column 'Data'.
In [237]:
dfnew = test.resample('W-MON').mean()
In [238]:
dfnew2 = dfnew.fillna(0)
In [239]:
dfnew3 = dfnew2.rolling(window=8, min_periods=0).mean()
In [240]:
dfnew3.head()
Out[240]:
In [241]:
max(dfnew3['minutes_to_comment'])
Out[241]:
In [242]:
dfnext=pd.DataFrame({'xvalues':dfnew2.index, 'yvalues':dfnew2["minutes_to_comment"]})
In [243]:
max(dfnext['yvalues'])
Out[243]:
In [244]:
pylab.ylim([0,90000])
pylab.plot('xvalues', 'yvalues', data=dfnext, label="Issue Response Time")
Out[244]:
In [ ]: