In [1]:
import numpy
import scipy
import pandas

In [ ]:


In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

%matplotlib inline

In [3]:
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import figure_factory as FF
import plotly.graph_objs as pogo
from plotly.graph_objs import Marker, Line, Data

init_notebook_mode(connected=True)



In [4]:
sys.path.append(os.path.join(os.path.abspath('../..'), 'src'))
sys.path.append(os.path.join(os.path.abspath('../..'), 'data'))

In [5]:
from importlib import reload

In [17]:
import utils
import bill_sponsor_analysis_pipeline
import bill_proc_utils as bpu

In [7]:
main_repo_dir = utils.get_main_dir()

In [8]:
reload(bill_sponsor_analysis_pipeline)


Out[8]:
<module 'bill_sponsor_analysis_pipeline' from '/home/immersinn/gits/ncga/src/bill_sponsor_analysis_pipeline.py'>

In [9]:
reprs_info, bill_info, sponsor_info = bill_sponsor_analysis_pipeline.main('2014')

In [10]:
reprs_info.head()


Out[10]:
District Session Chamber Name Party Incombant Label BillCount
35 36 2014 S Fletcher L. Hartsell, Jr. R NA F.Hartsell (R) 69
93 44 2014 H Rick Glazier D NA R.Glazier (D) 69
9 10 2014 S Brent Jackson R NA B.Jackson (R) 62
47 48 2014 S Tom Apodaca R NA T.Apodaca (R) 57
40 41 2014 S Jeff Tarte R NA J.Tarte (R) 54

In [11]:
sponsor_info.head()


Out[11]:
BillID Chamber Name SponsorID
0 0 H Lewis 102
1 1 H Lewis 102
2 2 H McGrady 166
3 2 H Dixon 53
4 3 H Cotham 149

In [12]:
bill_info.head()


Out[12]:
Session Chamber Bill Content LongTitle TableInfo Keywords Sponsors
0 2015E4 H 1 GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT... A HOUSE RESOLUTION adopting the permanent rule... {'Referredto': '', 'Sponsors': 'Representative... [ADOPTED, GENERAL ASSEMBLY, RESOLUTIONS, SIMPL... [Lewis]
1 2015E4 H 2 GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT... A JOINT RESOLUTIOn providing for adjournment s... {'Referredto': '', 'Sponsors': 'Representative... [ADJOURNMENT, GENERAL ASSEMBLY, RESOLUTIONS, J... [Lewis]
2 2015E4 H 3 GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT... AN ACT to provide further REGULATORY RELIEF TO... {'Referredto': '', 'Sponsors': 'Representative... [ADMINISTRATION DEPT., ADMINISTRATIVE CODE, AD... [McGrady, Dixon]
3 2015E4 H 4 GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT... AN ACT directing the department of transportat... {'Referredto': '', 'Sponsors': 'Representative... [BRIDGES, CONTRACTS, COUNTIES, INFRASTRUCTURE,... [Cotham, Bradford, J.Moore]
4 2015E4 H 5 GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT... AN ACT to clarify the service area for communi... {'Referredto': '', 'Sponsors': 'Representative... [COUNTIES, EDGECOMBE COUNTY, INFORMATION TECHN... [S.Martin]

Counting, Basic Analysis


In [14]:
total_bills = bill_info.shape[0]
sen_bills = sum(bill_info['Chamber'] == 'S')
hou_bills = sum(bill_info['Chamber'] == 'H')

Breakdown by Rep


In [15]:
color_dict = {'R' : 'red', 'D' : 'blue'}

trace0 = pogo.Bar(
    x=list(reprs_info['Label']),
    y=list(reprs_info['BillCount']),
    text=list(reprs_info['Label']),
    marker=dict(
        color=[color_dict[p] for p in reprs_info['Party']],
        ),
)

data = [trace0]
layout = pogo.Layout(
    title='NC GA Bill Sponsorship Counts, 2015-2016',
)

fig = pogo.Figure(data=data, layout=layout)
iplot(fig)



In [13]:
sponsor_summary_url = plot(fig, filename='images/NC GA Bill Sponsorship Counts 2015-201.html', auto_open=False,)
sponsor_summary_url = sponsor_summary_url[7:]

Helper Stuff


In [43]:
name_suffix_list = ['Jr', 'Sr', 'II', 'III', 'IV']

def get_last_name(full_name):
    name_parts = [p.strip() for p in full_name.split()]
    name_parts = [p for p in name_parts if p]
    
    last = ''
    if name_parts[-1].strip('.') not in name_suffix_list:
        last = name_parts[-1]
    else:
        last = name_parts[-2].strip(',')
    return(last)

def get_first_name(full_name):
    return(full_name.split()[0])

def get_firstinit(full_name):
    return(full_name[0])

def build_repr_ballotpedia_link(full_name):
    url_base = "https://ballotpedia.org/"
    url = url_base + get_first_name(full_name) + "_" + get_last_name(full_name)
    return(url)

def build_district_ballotpedia_link(district_no, chamber):
    url_base = "https://ballotpedia.org/"
    chamber_base = {'H' : 'North_Carolina_House_of_Representatives_',
                    'S' : 'North_Carolina_State_Senate_'}
    url = url_base + chamber_base[chamber] + 'District_' + str(district_no)
    return(url)

def build_ahref_link(text, url):
    return('<a href="' + url + '">' + text + '</a>')

In [19]:
session = '2014'

In [21]:
reprs_info['PersonURL'] = reprs_info.apply(lambda x: bpu.build_repr_link(x['Name'],
                                                                             x.name,
                                                                             session),
                                               axis=1)
reprs_info['DistrictURL'] = reprs_info.apply(lambda x: bpu.build_district_ballotpedia_link(x.District, x.Chamber),
                                             axis=1)
reprs_info['District'] = reprs_info.District.apply(lambda x: 'District ' + str(x))

In [22]:
reprs_info.head()


Out[22]:
District Session Chamber Name Party Incombant Label BillCount PersonURL DistrictURL
35 District 36 2014 S Fletcher L. Hartsell, Jr. R NA F.Hartsell (R) 69 2014_35.html https://ballotpedia.org/North_Carolina_State_S...
93 District 44 2014 H Rick Glazier D NA R.Glazier (D) 69 2014_93.html https://ballotpedia.org/North_Carolina_House_o...
9 District 10 2014 S Brent Jackson R NA B.Jackson (R) 62 2014_9.html https://ballotpedia.org/North_Carolina_State_S...
47 District 48 2014 S Tom Apodaca R NA T.Apodaca (R) 57 2014_47.html https://ballotpedia.org/North_Carolina_State_S...
40 District 41 2014 S Jeff Tarte R NA J.Tarte (R) 54 2014_40.html https://ballotpedia.org/North_Carolina_State_S...

In [40]:
hrefs = reprs_info.apply(lambda x: bpu.build_ahref_link(x.Name, x.PersonURL), axis=1)

In [41]:
hrefs[:5]


Out[41]:
35    <a href="2014_35.html">Fletcher L. Hartsell, J...
93              <a href="2014_93.html">Rick Glazier</a>
9               <a href="2014_9.html">Brent Jackson</a>
47               <a href="2014_47.html">Tom Apodaca</a>
40                <a href="2014_40.html">Jeff Tarte</a>
dtype: object

In [31]:
def build_chamber_summary_table(all_reps, chamber, sort_by='LN'):
    
    peeps = reprs_info[reprs_info.Chamber == chamber].copy()
    
    if sort_by=='LN':
        peeps['LN'] = peeps.Name.apply(bpu.get_last_name)
    
    peeps.sort_values(by=sort_by, inplace=True)
    
    data_matrix = [['Name', 'District', 'Party', 'Bills Sponsored']]

    for n,d,p,b in zip(peeps.apply(lambda x: bpu.build_ahref_link(x.Name, x.PersonURL), axis=1),
                       peeps.apply(lambda x: bpu.build_ahref_link(x.District, x.DistrictURL), axis=1),
                       peeps.Party,
                       peeps.BillCount):
        data_matrix.append([n,d,p,b])
        
    table = FF.create_table(data_matrix)
    table_url = plot(table, filename='images/' + chamber + '_rep_table.html', auto_open=False,)
    table_url = table_url[7:]
    
    return(table_url)

House Table


In [32]:
house_table_url = build_chamber_summary_table(reprs_info, 'H')

SenateTable


In [33]:
senate_table_url = build_chamber_summary_table(reprs_info, 'S')

Section 3: Bill Summary Data


In [34]:
bill_info.keywords[0]


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-34-552b5497b2be> in <module>()
----> 1 bill_info.keywords[0]

/home/immersinn/.virtualenvs/ncga/lib/python3.5/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2742             if name in self._info_axis:
   2743                 return self[name]
-> 2744             return object.__getattribute__(self, name)
   2745 
   2746     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'keywords'

In [35]:
def build_chamber_keywords_table(bill_info, chamber, cutoff=10):
    kw_table = FF.create_table(bpu.build_chamber_keywords_df(bill_info, chamber))
    table_url = plot(kw_table, filename='images/' + chamber + '_keyword_table.html', auto_open=False,)
    table_url = table_url[7:]
    
    return(table_url)

In [36]:
house_kwtable_url = build_chamber_keywords_table(bill_info, 'H')
senate_kwtable_url = build_chamber_keywords_table(bill_info, 'S')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-36-49df6dcf952f> in <module>()
----> 1 house_kwtable_url = build_chamber_keywords_table(bill_info, 'H')
      2 senate_kwtable_url = build_chamber_keywords_table(bill_info, 'S')

<ipython-input-35-bce8c2fbb2f2> in build_chamber_keywords_table(bill_info, chamber, cutoff)
      1 def build_chamber_keywords_table(bill_info, chamber, cutoff=10):
----> 2     kw_table = FF.create_table(bpu.build_chamber_keywords_df(bill_info, chamber))
      3     table_url = plot(kw_table, filename='images/' + chamber + '_keyword_table.html', auto_open=False,)
      4     table_url = table_url[7:]
      5 

/home/immersinn/gits/ncga/src/bill_proc_utils.py in build_chamber_keywords_df(bill_info, chamber)
    117 def build_chamber_keywords_df(bill_info, chamber):
    118     sub_index = bill_info['Chamber']==chamber
--> 119     return(build_keywords_df(bill_info, sub_index))
    120 
    121 

/home/immersinn/gits/ncga/src/bill_proc_utils.py in build_keywords_df(bill_info, sub_index, cutoff, sort)
     98 def build_keywords_df(bill_info, sub_index=[], cutoff=10, sort=True):
     99 
--> 100     if not sub_index:
    101         sub_index = [True for _ in range(bill_info.shape[0])]
    102     kw_counts = count_keywords(bill_info[sub_index]['Keywords'])

/home/immersinn/.virtualenvs/ncga/lib/python3.5/site-packages/pandas/core/generic.py in __nonzero__(self)
    915         raise ValueError("The truth value of a {0} is ambiguous. "
    916                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 917                          .format(self.__class__.__name__))
    918 
    919     __bool__ = __nonzero__

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Generate HTML as a String and Write to File


In [134]:
html_string = '''
<html>
    <head>
        <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css">
        <style>body{ margin:0 100; background:whitesmoke; }</style>
    </head>
    <body>
        <h1>NCGA: Summary Info for Bills Filed During 2015 - 2016 Session</h1>

        <!-- *** Section 1 *** --->
        <h2>Section 1: General Summary Info</h2>
            <h4>Total Bills Filed: ''' + str(total_bills) + '''</h4>
            <h4>Senate Bills Filed: ''' + str(sen_bills) + '''</h4>
            <h4>House Bills Filed: ''' + str(hou_bills) + '''</h4>

        
        <!-- *** Section 2 *** --->
        <h2>Section 2: Breakdown by Representative</h2>

            <iframe width="1500" height="800" frameborder="0" seamless="seamless" scrolling="no"\
    src="''' + sponsor_summary_url + '''"></iframe>
    
    
        <table width="100%">
                <tr>
                    <td width="48%"><h3>House of Representatives Summary</h3></td>
                    <td width="48%"><h3>Senate Summary</h3></td>
                </tr>
            </table>
        
        <iframe style="padding:40px" width="48%" height="480" frameborder="0" seamless="seamless" scrolling="yes" align="left"\
    src="''' + house_table_url + '''"></iframe>
    
        <iframe style="padding:40px" width="48%" height="480" frameborder="0" seamless="seamless" scrolling="yes" align="right"\
    src="''' + senate_table_url + '''"></iframe>
    
    
        <!-- *** Section 3 *** --->
        <h2>Section 3: Bill Topics Overview</h2>
        
            <p>Table with keyword info, other topic info goes here</p>
            
            <table width="100%">
                <tr>
                    <td width="48%"><h3>House of Representatives</h3></td>
                    <td width="48%"><h3>Senate</h3></td>
                </tr>
            </table>
        
        <iframe style="padding:40px" width="48%" height="480" frameborder="0" seamless="seamless" scrolling="yes" align="left"\
    src="''' + house_kwtable_url + '''"></iframe>
    
        <iframe style="padding:40px" width="48%" height="480" frameborder="0" seamless="seamless" scrolling="yes" align="right"\
    src="''' + senate_kwtable_url + '''"></iframe>

    
    </body>
</html>'''

In [135]:
with open(os.path.join(main_repo_dir,'reports/dashboards/NCGABillsSummary.html'),'w') as f:
    f.write(html_string)

In [ ]: