write_mysql

a notebook describing the code to write the data to a mysql database


In [8]:
%matplotlib inline
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [73]:
import nhrc2
from nhrc2.backend import get_neighborhoods as get_ngbrhd
from nhrc2.backend import read_issues as ri
import unicodedata
import pandas as pd

In [3]:
readfile=True
writejson=False
scf_df = ri.get_issues(readfile=readfile, writejson=writejson)

hoods = get_ngbrhd.get_neighborhoods()

scf_df['neighborhood'] = hoods


Category: SNOW RELATED, id: 8516, readfile: True
Category: Bins for Trash & Recycling, id: 5743, readfile: True
Category: Graffiti, id: 122, readfile: True
Category: Hangers, id: 6215, readfile: True
Category: Health Complaints, id: 5185, readfile: True
Category: Illegal Dumping, id: 1250, readfile: True
Category: Other, id: 374, readfile: True
Category: Other - city responsibility, id: 3018, readfile: True
Category: Parking Meter, id: 372, readfile: True
Category: Parking Violation/Abandoned Auto, id: 121, readfile: True
Category: Parks Request, id: 126, readfile: True
Category: Policing Issue, id: 2626, readfile: True
Category: Potholes, id: 116, readfile: True
Category: Public Space, Streets and Drains, id: 1249, readfile: True
Category: Private Property Issue, id: 1251, readfile: True
Category: Sidewalks and Curb damage, id: 117, readfile: True
Category: Signs / Bus Shelters / Pavement Markings, id: 373, readfile: True
Category: Street Lamp, id: 124, readfile: True
Category: Traffic/Road Safety, id: 2625, readfile: True
Category: Traffic Signal / Pedestrian Signal, id: 51, readfile: True
Category: Trash & Recycling, id: 1966, readfile: True
Category: Tree Trimming, id: 1853, readfile: True
Category: Request for volunteers, id: 5998, readfile: True
Category: General Bus Request/Incident, id: 4947, readfile: True
Category: Post to Neighbors, id: other, readfile: True
Category: SNOW RELATED, id: 8516, readfile: True
Category: Bins for Trash & Recycling, id: 5743, readfile: True
Category: Graffiti, id: 122, readfile: True
Category: Hangers, id: 6215, readfile: True
Category: Health Complaints, id: 5185, readfile: True
Category: Illegal Dumping, id: 1250, readfile: True
Category: Other, id: 374, readfile: True
Category: Other - city responsibility, id: 3018, readfile: True
Category: Parking Meter, id: 372, readfile: True
Category: Parking Violation/Abandoned Auto, id: 121, readfile: True
Category: Parks Request, id: 126, readfile: True
Category: Policing Issue, id: 2626, readfile: True
Category: Potholes, id: 116, readfile: True
Category: Public Space, Streets and Drains, id: 1249, readfile: True
Category: Private Property Issue, id: 1251, readfile: True
Category: Sidewalks and Curb damage, id: 117, readfile: True
Category: Signs / Bus Shelters / Pavement Markings, id: 373, readfile: True
Category: Street Lamp, id: 124, readfile: True
Category: Traffic/Road Safety, id: 2625, readfile: True
Category: Traffic Signal / Pedestrian Signal, id: 51, readfile: True
Category: Trash & Recycling, id: 1966, readfile: True
Category: Tree Trimming, id: 1853, readfile: True
Category: Request for volunteers, id: 5998, readfile: True
Category: General Bus Request/Incident, id: 4947, readfile: True
Category: Post to Neighbors, id: other, readfile: True

In [69]:
from nhrc2.backend import connect_nhrc_db as conndb

In [102]:
engine = conndb.connect_nhrc_db()

In [44]:
scf_df.columns


Out[44]:
Index([u'acknowledged_at', u'address', u'category', u'closed_at', u'created_at', u'description', u'id', u'issue_id', u'lat', u'lng', u'reporter_id', u'reporter_name', u'reporter_role', u'shortened_url', u'status', u'summary', u'updated_at', u'int_issue_id', u'neighborhood'], dtype='object')

In [93]:
#scf_df['summary'] = scf_df['summary'].str.replace('"', '')
scf_df['summary'] = [el.encode('ascii', 'ignore') for el in scf_df['summary'].values]
#scf_df['description'] = scf_df['description'].str.replace(u'\u2019', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u201c', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u201d', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u2013', '')
scf_df['description'] = [el.encode('ascii', 'ignore') for el in scf_df['description'].values]

In [94]:
#for el in scf_df['description'].values:
#    print(' ')
#    print('########################################################')
#    print(el)
#    el.encode('ascii', 'ignore')

In [95]:
#scf_df_to_write = scf_df[['acknowledged_at', 'address', 'category', 'closed_at', 'created_at', 'description']]

In [99]:
scf_df.head(3)


Out[99]:
acknowledged_at address category closed_at created_at description id issue_id lat lng reporter_id reporter_name reporter_role shortened_url status summary updated_at int_issue_id neighborhood
0 2015-03-30T16:03:33-04:00 25 Elmer Street New Haven, Connecticut SNOW RELATED None 2015-03-29T17:18:53-04:00 Snow storm weekend of 3/21/15 I witnessed sno... 1559551 8516 41.310373 -72.924160 0 CAROL SANTACROCE Registered User http://bit.ly/1HVR8D7 Acknowledged SNOW RELATED 2015-03-30T16:03:33-04:00 8516 Downtown
1 None Lenox Street And Clifton Street New Haven, Co... SNOW RELATED None 2015-03-17T12:04:30-04:00 Resident reports that the sidewalks at the cor... 1536264 8516 41.309046 -72.884204 6774 Department of Public Works Verified Official http://bit.ly/19w8mbW Open Unshoveled Sidewalks 2015-03-17T12:04:30-04:00 8516 Fair Haven Heights
2 None 591 Whitney Avenue New Haven, Connecticut SNOW RELATED None 2015-03-16T17:17:58-04:00 CT Transit bus driver of J2 bus leaving downto... 1534432 8516 41.326617 -72.915233 0 An anonymous SeeClickFix user Registered User http://bit.ly/1GeXnOd Open SNOW RELATED 2015-03-16T17:17:58-04:00 8516 East Rock

In [100]:
import sqlalchemy

In [104]:
#list datatypes:
my_data_types = {'acknowledged_at': sqlalchemy.types.DateTime(timezone=True),
                 'address': sqlalchemy.types.Text,
                 'category': sqlalchemy.types.Text,
                 'closed_at': sqlalchemy.types.DateTime(timezone=True),
                 'created_at': sqlalchemy.types.DateTime(timezone=True),
                 'description': sqlalchemy.types.Text,
                 'id': sqlalchemy.types.INTEGER,
                 'issue_id': sqlalchemy.types.INTEGER,
                 'lat': sqlalchemy.types.FLOAT,
                 'lng': sqlalchemy.types.FLOAT,
                 'reporter_id': sqlalchemy.types.INTEGER,
                 'reporter_name': sqlalchemy.types.Text,
                 'reporter_role': sqlalchemy.types.Text,
                 'shortened_url': sqlalchemy.types.Text,
                 'status': sqlalchemy.types.Text,
                 'summary': sqlalchemy.types.Text,
                 'updated_at': sqlalchemy.types.DateTime(timezone=True),
                 'int_issue_id': sqlalchemy.types.INTEGER,
                 'neighborhood': sqlalchemy.types.Text}
                 
                 

scf_df.to_sql('nhrc', engine, if_exists='replace', index=False, dtype=my_data_types)

In [110]:
len(scf_df) == 0


Out[110]:
False

In [109]:
len('') == 0


Out[109]:
True

In [ ]: