In [1]:
import pandas as pd # Beautiful tool for data wrangling! e.g. '!pip install pandas' from a Notebook
# See https://mariadb.com/blog/how-connect-python-programs-mariadb e.g. '!pip install mysql' from Notebook
import MySQLdb 
import re
from collections import Counter
import os
from collections import OrderedDict
import pickle

pd.set_option("display.max_rows",35) # Useful when having large Pandas DataFrames like we do here

In [2]:
conn = MySQLdb.connect(user='mos', passwd='', db='monuments_db', charset='utf8', use_unicode=True)
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")


Out[2]:
0

In [5]:
def create_mapping_table_from_source_tables(table):
    """Takes a Pandas DataFrame object and writes wikitables to files named after the table they are
    produced from to {the current Directory}/wikitables/
    """
    sql = 'SELECT * FROM `{}`'.format(table)
    df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
    
    country_fields = list(df.keys())
    country_values = list(df.ix[0]) # get first row in table as example data from 
    country_data = list(zip(country_fields, country_values))
    
    table_header = '{| class="wikitable" style="width: 675px;\n'
    table_name = '|+ '+table + "\n"
    # create table columns
    table_columns = "! heritage field\n! example value\n! Wikidata property\n! Conversion\n! Comment\n|-\n"
    table_rows = []
    for (field, value) in zip(country_fields, country_values):
        row="| "+ str(field) + "\n| " + str(value) + "\n|\n| \n| \n|-\n"
        table_rows.append(row)
    
    table_rows_str = "".join(table_rows)
    # Fill in examples values from the first record in the table
    table_footer = "\n|}"
    wikitable = table_header + table_name + table_columns + table_rows_str[:-1] + table_footer
    
    if os.path.isdir("./mappingtables"):
        out = open("./mappingtables/" + table + ".mappingtable","w")
        out.write(wikitable)
        out.flush()
        
        print("Directory ./files exists. Wrote file {}".format(out.name))
        out.close()
    else:
        os.mkdir("./mappingtables")
        with open("./mappingtables/" + table + ".mappingtable","w") as out:
            out.write(wikitable)
            print("./mappingtables doesn't exist. Created it and wrote file {}".format(out.name))

In [6]:
final_tables = pickle.load(open("./final_tables.pickle","rb"))
for table in final_tables:
    create_mapping_table_from_source_tables(table)


./mappingtables doesn't exist. Created it and wrote file ./mappingtables/monuments_hk-hb_(en).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_nl-prov_(nl).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_at_(de).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_ee_(et).mappingtable
Directory ./files exists. Wrote file ./mappingtables/se_fornmin.mappingtable
Directory ./files exists. Wrote file ./mappingtables/se_bbr.mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_it_(it).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_be-wal_(en).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_th_(th).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_sk_(de).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_jo_(ar).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_il_(he).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_de-nrw-bm_(de).mappingtable
Directory ./files exists. Wrote file ./mappingtables/monuments_sv_(es).mappingtable
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-6-0475956e1830> in <module>()
      1 final_tables = pickle.load(open("./final_tables.pickle","rb"))
      2 for table in final_tables:
----> 3     create_mapping_table_from_source_tables(table)

<ipython-input-5-5080a9d057d7> in create_mapping_table_from_source_tables(table)
      4     """
      5     sql = 'SELECT * FROM `{}`'.format(table)
----> 6     df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
      7 
      8     country_fields = list(df.keys())

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
    497             sql, index_col=index_col, params=params,
    498             coerce_float=coerce_float, parse_dates=parse_dates,
--> 499             chunksize=chunksize)
    500 
    501     try:

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize)
   1593 
   1594         args = _convert_params(sql, params)
-> 1595         cursor = self.execute(*args)
   1596         columns = [col_desc[0] for col_desc in cursor.description]
   1597 

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
   1558                 cur.execute(*args, **kwargs)
   1559             else:
-> 1560                 cur.execute(*args)
   1561             return cur
   1562         except Exception as exc:

/home/mos/anaconda3/lib/python3.5/site-packages/MySQLdb/cursors.py in execute(self, query, args)
    215         res = None
    216         try:
--> 217             res = self._query(query)
    218         except TypeError as m:
    219             if m.args[0] in ("not enough arguments for format string",

/home/mos/anaconda3/lib/python3.5/site-packages/MySQLdb/cursors.py in _query(self, q)
    376 
    377     def _query(self, q):
--> 378         rowcount = self._do_query(q)
    379         self._post_get_result()
    380         return rowcount

/home/mos/anaconda3/lib/python3.5/site-packages/MySQLdb/cursors.py in _do_query(self, q)
    340         self._last_executed = q
    341         db.query(q)
--> 342         self._do_get_result()
    343         return self.rowcount
    344 

/home/mos/anaconda3/lib/python3.5/site-packages/MySQLdb/cursors.py in _do_get_result(self)
    152     def _do_get_result(self):
    153         db = self._get_db()
--> 154         self._result = self._get_result()
    155         self.rowcount = db.affected_rows()
    156         self.rownumber = 0

/home/mos/anaconda3/lib/python3.5/site-packages/MySQLdb/cursors.py in _get_result(self)
    373     query, or using CursorUseResultMixIn instead."""
    374 
--> 375     def _get_result(self): return self._get_db().store_result()
    376 
    377     def _query(self, q):

KeyboardInterrupt: 

In [ ]: