In [114]:
import pandas as pd
import csv
%matplotlib inline
In [115]:
%matplotlib inline
In [213]:
jira_csv = csv.DictReader(open('JIRA.csv',"r"))
jira_frame = pd.read_csv(filepath_or_buffer="JIRA.csv",usecols=["Created","Updated","Project key","Project name"],parse_dates=['Created','Updated'],index_col=['Created'])
In [166]:
jira_frame.head(5)
Out[166]:
In [264]:
jira_frame[jira_frame['Created year'] == 2017]['Project key'].value_counts()
Out[264]:
In [284]:
jira_frame["Created year"] = jira_frame.index.year
jira_frame["Created month"] = jira_frame.index.month
hadoop_group = ['PRES', 'HIVE', 'HADTWO','SQOOP', 'HAD', 'QTEZ']
spark_group = [ 'SPAR','ZEP']
infra_group = ['ACM','QBOL','SDK','SCHED']
group_list = []
for key in jira_frame['Project key']:
if key in hadoop_group:
group_list.append( 'HADOOP_GROUP')
elif key in spark_group:
group_list.append('SPARK_ZEP_GROUP')
elif key in infra_group:
group_list.append('INFRA_GROUP')
elif key == "MW":
group_list.append(key)
else:
group_list.append('MISC')
jira_frame['Project group'] = group_list
jira_frame[jira_frame['Created year'] == 2017]['Project group'].value_counts()
Out[284]:
In [300]:
jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month']).size()
Out[300]:
In [304]:
jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month','Project key']).size()
Out[304]:
In [299]:
group_by_month = jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month','Project key'])
In [ ]: