a notebook describing the code to write the data to a mysql database
In [8]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [73]:
import nhrc2
from nhrc2.backend import get_neighborhoods as get_ngbrhd
from nhrc2.backend import read_issues as ri
import unicodedata
import pandas as pd
In [3]:
readfile=True
writejson=False
scf_df = ri.get_issues(readfile=readfile, writejson=writejson)
hoods = get_ngbrhd.get_neighborhoods()
scf_df['neighborhood'] = hoods
In [69]:
from nhrc2.backend import connect_nhrc_db as conndb
In [102]:
engine = conndb.connect_nhrc_db()
In [44]:
scf_df.columns
Out[44]:
In [93]:
#scf_df['summary'] = scf_df['summary'].str.replace('"', '')
scf_df['summary'] = [el.encode('ascii', 'ignore') for el in scf_df['summary'].values]
#scf_df['description'] = scf_df['description'].str.replace(u'\u2019', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u201c', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u201d', '')
#scf_df['description'] = scf_df['description'].str.replace(u'\u2013', '')
scf_df['description'] = [el.encode('ascii', 'ignore') for el in scf_df['description'].values]
In [94]:
#for el in scf_df['description'].values:
# print(' ')
# print('########################################################')
# print(el)
# el.encode('ascii', 'ignore')
In [95]:
#scf_df_to_write = scf_df[['acknowledged_at', 'address', 'category', 'closed_at', 'created_at', 'description']]
In [99]:
scf_df.head(3)
Out[99]:
In [100]:
import sqlalchemy
In [104]:
#list datatypes:
my_data_types = {'acknowledged_at': sqlalchemy.types.DateTime(timezone=True),
'address': sqlalchemy.types.Text,
'category': sqlalchemy.types.Text,
'closed_at': sqlalchemy.types.DateTime(timezone=True),
'created_at': sqlalchemy.types.DateTime(timezone=True),
'description': sqlalchemy.types.Text,
'id': sqlalchemy.types.INTEGER,
'issue_id': sqlalchemy.types.INTEGER,
'lat': sqlalchemy.types.FLOAT,
'lng': sqlalchemy.types.FLOAT,
'reporter_id': sqlalchemy.types.INTEGER,
'reporter_name': sqlalchemy.types.Text,
'reporter_role': sqlalchemy.types.Text,
'shortened_url': sqlalchemy.types.Text,
'status': sqlalchemy.types.Text,
'summary': sqlalchemy.types.Text,
'updated_at': sqlalchemy.types.DateTime(timezone=True),
'int_issue_id': sqlalchemy.types.INTEGER,
'neighborhood': sqlalchemy.types.Text}
scf_df.to_sql('nhrc', engine, if_exists='replace', index=False, dtype=my_data_types)
In [110]:
len(scf_df) == 0
Out[110]:
In [109]:
len('') == 0
Out[109]:
In [ ]: