In [36]:
## Import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Display floats with only one decimal
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Read NSERC CSV file into pandas dataFrame, use latin encoding to read the french characters
df = pd.read_csv('./NSERC/NSERC_GRT_FYR2014_AWARD.csv', encoding='latin-1')

## Count number of awardees by province and plot a horizontal bar chart of province vs number or awardees
plotAwardperProv = df.ProvinceEN.value_counts().plot(kind='barh', figsize=(15, 10), x=df.columns[5], 
            title='NSERC - Number of awardees per province')
fig = plotAwardperProv.get_figure()       ## need to get figure to save it to file
fig.savefig('./plots/NSERC_AwardeesperProvince.png')    ## Save figure to png file

## Make a subset dataframe with only Ontarian awardees
on_df = pd.DataFrame(df[df['ProvinceEN']=='Ontario'])

## Note: on_df.columns[4] is 'Institution-Établissement'
## Note: on_df.columns[11] is award money given

## Make dataframe where name of institution gets grouped, and the column is the total number of awards
## per institution
on_Nawards=pd.DataFrame(on_df[on_df.columns[11]].groupby(on_df[on_df.columns[4]]).count())

## Make dataframe where name of institution gets grouped, and the column is the sum of the 
## money amount of the awards per institution
on_Aawards=pd.DataFrame(on_df[on_df.columns[11]].groupby(on_df[on_df.columns[4]]).sum())

## Make a dataframe with the number od awards and total money
merged=pd.concat([on_Nawards, on_Aawards], axis=1, ignore_index=True)

## Rename column names form 0 and 1, to Number of Awards and Total Amount
merged.columns=['Number of Awards','Total Amount']

## Change data type from integers to floats
merged[[merged.columns[0], merged.columns[1]]] = merged[[merged.columns[0], merged.columns[1]]].astype(float)

## Once the data type is float, we can divide the total funding (column 1) 
## by the number of awards (column 0)
merged['Avg Amount per Award'] = merged[merged.columns[1]]/merged[merged.columns[0]]

## Reset the index column so that the institution becomes a column
mergedIndex = merged.reset_index()

## Make a subset dataframe of the merged data so that we can select by average funding per award,
## in this case, if greater than half a million dollars
mergedIndex_subAVRpAward=mergedIndex[mergedIndex[mergedIndex.columns[3]] >= 
                            500000].sort_values(by=mergedIndex.columns[3], ascending=True)
## Make a bar plot
plotAvg=mergedIndex_subAVRpAward.plot( x=mergedIndex_subAVRpAward.columns[0], y=mergedIndex_sub.columns[3],
    kind='barh', figsize=(15, 20), 
    title='NSERC - Institutions with average funding >$500,000 per grant active in 2014')
fig = plotAvg.get_figure()
fig.savefig('./plots/NSERC_AvgFundingperAward.png')

## Make a subset dataframe of the merged data so that we can select by total funding,
## in this case, by total funding greater than half a million dollars
mergedIndex_subTotFunding=mergedIndex[mergedIndex[mergedIndex.columns[2]] >= 500000
                           ].sort_values(by=mergedIndex.columns[2], ascending=True)
## Make a bar plot
plotTotFunding=mergedIndex_subTotFunding.plot( x=mergedIndex_subTotFunding.columns[0], 
    y=mergedIndex_subTotFunding.columns[2], kind='barh', figsize=(15, 20), 
    title='NSERC - Institutions with total funding >$500,000 active in 2014')

fig = plotTotFunding.get_figure()
fig.savefig('./plots/NSERC_TotalFunding.png')

## Make a subset dataframe of the merged data so that we can select by total funding,
## in this case, by total number of awards greater than 50
mergedIndex_subTotAwards=mergedIndex[mergedIndex[mergedIndex.columns[1]] >= 50
                           ].sort_values(by=mergedIndex.columns[1], ascending=True)
## Make a bar plot
plotTotAwards=mergedIndex_subTotAwards.plot( x=mergedIndex_subTotAwards.columns[0], 
    y=mergedIndex_subTotAwards.columns[1], kind='barh', figsize=(15, 20), 
    title='NSERC - Institutions with more than 50 grants active in 2014')
fig = plotTotAwards.get_figure()
fig.savefig('./plots/NSERC_GrantsPerInst.png')



In [ ]: