Tutorial to create dynamic matplotlib plots using Interactive Widgets

In this notebook, you will learn how to write create interactive widgets and plot of different type graphs dynamically based on user input Steps involved:

upload data files to be used in the graphs.
clean the dataset a little bit
create a function to populate values in the interactive dropdown widget
examples of single and multiple inputs in the widget
examples of individual and comparison plots

Instructions:

requires latest version of ipython to work on interactive widgets
requires crunchbase_monthly_export_acquisitions.csv and crunchbase_monthly_export_investments.csv. You can download from the links below: https://github.com/prabhamatta/Analyzing-Open-Data/blob/master/notebooks/crunchbase_monthly_export_acquisitions.csv https://github.com/prabhamatta/Analyzing-Open-Data/blob/master/notebooks/crunchbase_monthly_export_investments.csv



In [1]:

    
%pylab inline
%matplotlib inline
import matplotlib.pyplot as plt

from pylab import figure, show
from pandas import DataFrame, Series
import pandas as pd

import json

import IPython
from IPython.display import display, Image, HTML, clear_output
from IPython.html.widgets import interact, interactive, fixed
from IPython.html import widgets









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
#define colors to use in charts
COLOR_GRAY = "#EEEEEE"
COLOR_WHITE = "#FFFFFF"
COLOR_RED = "#FC3F52"
COLOR_BLUE = "#3376A3"
COLOR_ORANGE = "#F5CB4E"
COLOR_BLACK = "#303030"
COLOR_GREEN = "#5FBD48"



In [3]:

    
# Loading acquired companies dataset
acquisitions = pd.read_csv('crunchbase_monthly_export_acquisitions.csv')
acquisitions.fillna("",inplace=True)

#loading investments dataset
investments = pd.read_csv('crunchbase_monthly_export_investments.csv')
investments.fillna("",inplace=True)

#some datacleaning
def convert_money(x):
    if (x == '' or x == ' '):
        return -1
    else:
        return int(str(x).replace(',', ''))

acquisitions.price_amount = acquisitions.price_amount.apply(convert_money)
investments.raised_amount_usd = investments.raised_amount_usd.apply(convert_money)









    



/Users/prabha/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1070: DtypeWarning: Columns (9) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

1. Mean Funding for a category



In [4]:

    
def funding_for_category(**kwargs):
    for k,v in kwargs.items():
        input_cat_code= v
        filter = (investments.funded_year > 2000) & (investments.funded_year <= 2013) & (investments.company_category_code == input_cat_code)
        categ_grp = investments[filter].groupby('funded_year').mean()
        categ_grp.plot(kind='bar',color=COLOR_BLUE,legend=True,label =input_cat_code)
        plt.xlabel("Funded Year")
        plt.ylabel("Mean Funding amount")
        plt.title("Mean Funding amount for the category:"+ input_cat_code, fontsize =15)



In [5]:

    
i = interact(funding_for_category,
         Category = widgets.DropdownWidget(value ='software', values=list(acquisitions.groupby('company_category_code').sum().index))
         )

2. For a year, Top cities for acquisitions by year (x = city, y = count of acquired company and count of acquirer company location, limit = 20, bar)



In [6]:

    
def location_acquisitions_for_year(**kwargs):
    for k,v in kwargs.items():
        year= v
        filter = (acquisitions.price_amount > 1) & (acquisitions.company_country_code == 'USA') & (acquisitions.company_city != '') & (acquisitions.acquired_year == year)
        #top cities that have a company city listed
        company_group = acquisitions[filter].groupby('company_city').count().sort('company_permalink',ascending=False)[:20]
        top_cities_acquired = company_group['company_permalink']
        top_cities_acquirer = acquisitions[filter].groupby('acquirer_city').count().ix[top_cities_acquired.index]['company_permalink']
        #plt.xticks(rotation=45)
        top_cities_acquirer.plot(kind='bar',color=COLOR_RED,label='Acquirer City')
        top_cities_acquired.plot(kind='bar',color=COLOR_GREEN,stacked=True,label='Acquired City')
        plt.title("Top cities for acquisitions in a year\n Green: Acquired City  &  Red : Acquirer City", fontsize =15)
        plt.ylabel("Number of acquisitions")
        plt.show()



In [7]:

    
years = range(2000,2014)
i = interact(location_acquisitions_for_year,
         Year = widgets.DropdownWidget(value =2013, values=years)
         )

3. Side By Side Comparison of Categories by total Funding amount



In [8]:

    
def tot_compare_funding_categ1_categ2(**kwargs):
    for k,v in kwargs.items():
        if k=='Category1':   
            categ1= v
        if k=='Category2':   
            categ2= v
    total_funding_for_categories(categ1,categ2)

def total_funding_for_categories(categ1,categ2):

    filter1 = (investments.funded_year > 2000) & (investments.funded_year <= 2013) & (investments.company_category_code == categ1)
    filter2 = (investments.funded_year > 2000) & (investments.funded_year <= 2013) & (investments.company_category_code == categ2)

    categ1_grp = investments[filter1].groupby('funded_year').sum()
    categ2_grp = investments[filter2].groupby('funded_year').sum()

#     fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 12))

    categ1_grp.plot(kind='line',color='g',legend=True,label =categ1, linewidth=2.5)
    plt.xlabel("Funded Year")
    plt.ylabel("Total Funding amount")
    plt.title("Total Funding amount for the category:"+ categ1, fontsize =15)

    categ2_grp.plot(kind='line',color='b',legend=True,label =categ2, linewidth=2.5)
    plt.xlabel("Funded Year")
    plt.ylabel("Total Funding amount")
    plt.title("Total Funding amount for the category: "+ categ2, fontsize =15)

    plt.show()



In [9]:

    
category_list = list(acquisitions.groupby('company_category_code').sum().index)
i = interact(tot_compare_funding_categ1_categ2,
         Category1 = widgets.DropdownWidget(value ='software', values=category_list),
         Category2 = widgets.DropdownWidget(value ='web', values=category_list)
         )