In [114]:
import pandas as pd
import csv
%matplotlib inline

In [115]:
%matplotlib inline

In [213]:
jira_csv = csv.DictReader(open('JIRA.csv',"r"))
jira_frame = pd.read_csv(filepath_or_buffer="JIRA.csv",usecols=["Created","Updated","Project key","Project name"],parse_dates=['Created','Updated'],index_col=['Created'])

In [166]:
jira_frame.head(5)


Out[166]:
Project key Project name Created Updated
0 ZEP ZEPPELIN 2017-08-08 17:08:00 2017-08-09 02:48:00
1 AD Admin 2017-08-08 16:26:00 2017-08-09 00:17:00
2 SPAR Spark 2017-08-07 17:19:00 2017-08-07 17:19:00
3 ACM AWS Cluster Management 2017-08-04 22:23:00 2017-08-09 04:54:00
4 SPAR Spark 2017-08-04 18:12:00 2017-08-06 22:50:00

In [264]:
jira_frame[jira_frame['Created year'] == 2017]['Project key'].value_counts()


Out[264]:
MW        31
ACM       27
SPAR      27
ZEP       21
SOL       20
HIVE      13
UI        10
PRES       9
SQOOP      5
SDK        5
SCHED      5
ODBC       4
QBOL       4
HADTWO     4
AD         3
HAD        3
JDBC       2
AN         2
AIR        2
ESC        1
QTEZ       1
EAM        1
SEC        1
QPIG       1
Name: Project key, dtype: int64

In [284]:
jira_frame["Created year"] = jira_frame.index.year
jira_frame["Created month"] = jira_frame.index.month
hadoop_group = ['PRES', 'HIVE', 'HADTWO','SQOOP', 'HAD', 'QTEZ']
spark_group = [ 'SPAR','ZEP']
infra_group = ['ACM','QBOL','SDK','SCHED']
group_list = []
for key in jira_frame['Project key']:
    if key in hadoop_group:
        group_list.append( 'HADOOP_GROUP')
    elif key in spark_group:
        group_list.append('SPARK_ZEP_GROUP')
    elif key in infra_group:
        group_list.append('INFRA_GROUP')
    elif key == "MW":
        group_list.append(key)
    else:
        group_list.append('MISC')
    
jira_frame['Project group'] = group_list
jira_frame[jira_frame['Created year'] == 2017]['Project group'].value_counts()


Out[284]:
SPARK_ZEP_GROUP    48
MISC               47
INFRA_GROUP        41
HADOOP_GROUP       35
MW                 31
Name: Project group, dtype: int64

In [300]:
jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month']).size()


Out[300]:
Created month
1    13
2    16
3    24
4    28
5    43
6    33
7    33
8    12
dtype: int64

In [304]:
jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month','Project key']).size()


Out[304]:
Created month  Project key
1              ACM            2
               AD             1
               HIVE           3
               ODBC           1
               PRES           1
               SEC            1
               SPAR           2
               SQOOP          1
               ZEP            1
2              ACM            1
               AD             1
               MW             4
               PRES           1
               QBOL           1
               SCHED          1
               SDK            1
               SPAR           4
               ZEP            2
3              ACM            4
               EAM            1
               HADTWO         1
               HIVE           1
               MW             5
               ODBC           1
               SOL            2
               SPAR           4
               SQOOP          1
               UI             2
               ZEP            2
4              ACM            3
                             ..
6              QPIG           1
               SCHED          1
               SDK            2
               SOL            6
               SPAR           6
               UI             4
               ZEP            2
7              ACM            5
               AIR            2
               AN             1
               ESC            1
               HAD            2
               HADTWO         1
               HIVE           3
               JDBC           1
               MW             4
               ODBC           1
               QBOL           1
               SCHED          1
               SPAR           5
               SQOOP          2
               ZEP            3
8              ACM            2
               AD             1
               HADTWO         1
               HIVE           1
               MW             2
               PRES           1
               SPAR           2
               ZEP            2
Length: 91, dtype: int64

In [299]:
group_by_month = jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month','Project key'])


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-299-7123ffb9bd02> in <module>()
      1 group_by_month = jira_frame[jira_frame['Created year'] == 2017].groupby(['Created month','Project key'])
----> 2 group_by_month[['Project key'] == 'ACM']

/src/pyenvs/addyqdsenv/local/lib/python2.7/site-packages/pandas/core/base.pyc in __getitem__(self, key)
    352         else:
    353             if key not in self.obj:
--> 354                 raise KeyError("Column not found: %s" % key)
    355             return self._gotitem(key, ndim=1)
    356 

KeyError: 'Column not found: False'

In [ ]: