In [10]:
import pandas as pd # Beautiful tool for data wrangling! e.g. '!pip install pandas' from a Notebook
# See https://mariadb.com/blog/how-connect-python-programs-mariadb e.g. '!pip install mysql' from Notebook
import MySQLdb 
import re
from collections import Counter
import os
from collections import OrderedDict
import random
import pickle

pd.set_option("display.max_rows",35) # Useful when having large Pandas DataFrames like we do here

In [11]:
conn = MySQLdb.connect(user='mos', passwd='', db='monuments_db', charset='utf8', use_unicode=True)
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")

Each source table separately


In [15]:
final_tables = pickle.load(open("./final_tables.pickle","rb")) # monuments_all and admin etc removed
final_tables


Out[15]:
{'monuments-old_ch_(de)',
 'monuments-old_ch_(en)',
 'monuments-old_ch_(fr)',
 'monuments-old_ch_(it)',
 'monuments_ad_(ca)',
 'monuments_am_(hy)',
 'monuments_aq_(en)',
 'monuments_ar_(es)',
 'monuments_at_(de)',
 'monuments_az_(az)',
 'monuments_be-bru_(nl)',
 'monuments_be-vlg_(en)',
 'monuments_be-vlg_(fr)',
 'monuments_be-vlg_(nl)',
 'monuments_be-wal_(en)',
 'monuments_be-wal_(fr)',
 'monuments_be-wal_(nl)',
 'monuments_bo_(es)',
 'monuments_by_(be-tarask)',
 'monuments_ca_(fr)',
 'monuments_ch2_(de)',
 'monuments_ch3_(de)',
 'monuments_ch_(de)',
 'monuments_ch_(fr)',
 'monuments_ch_(it)',
 'monuments_cl_(es)',
 'monuments_cm_(fr)',
 'monuments_cn_(en)',
 'monuments_co_(es)',
 'monuments_cz_(cs)',
 'monuments_de-by_(de)',
 'monuments_de-he_(de)',
 'monuments_de-nrw-bm_(de)',
 'monuments_de-nrw-k_(de)',
 'monuments_dk-bygninger_(da)',
 'monuments_dk-fortidsminder_(da)',
 'monuments_dz_(ar)',
 'monuments_ee_(et)',
 'monuments_es-ct_(ca)',
 'monuments_es-gl_(gl)',
 'monuments_es-vc_(ca)',
 'monuments_es_(ca)',
 'monuments_es_(es)',
 'monuments_fr-object_(fr)',
 'monuments_fr_(ca)',
 'monuments_fr_(fr)',
 'monuments_gb-eng_(en)',
 'monuments_gb-nir_(en)',
 'monuments_gb-sct_(en)',
 'monuments_gb-wls_(en)',
 'monuments_gh_(en)',
 'monuments_hk-hb_(en)',
 'monuments_hk_(en)',
 'monuments_hu_(hu)',
 'monuments_ie_(en)',
 'monuments_il_(he)',
 'monuments_in_(en)',
 'monuments_it-88_(ca)',
 'monuments_it-bz_(de)',
 'monuments_it_(it)',
 'monuments_jo_(ar)',
 'monuments_jp-nhs_(en)',
 'monuments_ke_(en)',
 'monuments_lu_(lb)',
 'monuments_mt_(de)',
 'monuments_mx_(es)',
 'monuments_nl-aw_(en)',
 'monuments_nl-aw_(nl)',
 'monuments_nl-gem_(nl)',
 'monuments_nl-prov_(nl)',
 'monuments_nl_(nl)',
 'monuments_no_(no)',
 'monuments_np_(en)',
 'monuments_pa_(es)',
 'monuments_ph_(en)',
 'monuments_pk_(en)',
 'monuments_pl-old_(pl)',
 'monuments_pl_(pl)',
 'monuments_pt_(pt)',
 'monuments_ro_(ro)',
 'monuments_rs_(sr)',
 'monuments_ru_(ru)',
 'monuments_sk_(de)',
 'monuments_sk_(sk)',
 'monuments_sv_(es)',
 'monuments_th_(th)',
 'monuments_tn_(fr)',
 'monuments_ua_(uk)',
 'monuments_us-ca_(en)',
 'monuments_us_(en)',
 'monuments_uy_(es)',
 'monuments_ve_(es)',
 'monuments_za_(en)',
 'se_arbetsl',
 'se_bbr',
 'se_fornmin',
 'se_ship'}

In [22]:
def create_source_table_value_examples():
    """Takes a Pandas DataFrame object and writes wikitables to files named after the table they are
    produced from to {the current Directory}/wikitables/
    """
    final_tables = pickle.load(open("./final_tables.pickle","rb")) # monuments_all and admin etc removed
    
    for table in final_tables:
        sql = "SELECT * FROM monuments_all"
        df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
        
        h1 = "= Non-standardized fields from table " + table + "=\n"
        column_tables = [] 
        
        lang = df.lang.sample(n=1).to_string().split("    ")[1] # A very complicated way of getting one value!
        columns = df.columns
        page_sections = []
        
        for column in columns:
            # the listed fields we avoid are assumed to be standardized
            if column not in ["country","lang","project","changed","lat","lon","lat_int","lon_int"]:
                h2 = "== 10 random samples from field " + column + " ==\n"
                values = df[column]
                try:
                    sample = values.sample(n=10)
                    examples = sample.values
                except ValueError as e:
                    print("country: {} cannot be sampled.\n Error: {}\n Total objects is {}. Skipping country.".format(table, e, len(values)))
                    break
    
                table_header = '{| class="wikitable" style="width: 675px;\n'
                table_name = '|+ '+table + "-" + column + " 10 random samples\n"
            
                # create table columns
                table_columns = "! scope='col' style='width: 225px;' |" + str(column) + "\n" + \
                "! scope='col' style='width: 225px; height: 20px;'|" + "Conversion \n" + \
                "! scope='col' style='width: 225px;' | Comment\n|-\n"
                
                table_rows = []
            
                for example in examples:
                    row="| style='height: 20px;'| " + str(example) + "\n|\n|\n|-\n"
                    table_rows.append(row)
    
                    table_rows_str = "".join(table_rows)
                    # Fill in examples values from the first record in the table
                    table_footer = "\n|}"
            else:
                continue
            
            column_tables.append(h2)    
            column_table = table_header + table_name + table_columns + table_rows_str[:-1] + table_footer
            column_tables.append(column_table)
        
        wikipage = h1 + "\n".join(column_tables)
        
        if os.path.isdir("./langfiles_source"):
            #print(wikipage)
            out = open("./langfiles_source/" + lang + ".examples","w")
            out.write(wikipage)
            out.flush()
            print("Directory ./langfiles_source exists. Wrote file {}".format(out.name))
            out.close()
            
        else:
            os.mkdir("./langfiles_source")
            with open("./langfiles_source/" + lang + ".examples","w") as out:
                out.write(wikipage)
                print("./langfiles_source doesn't exist. Wrote file {}".format(out.name))
        
        if os.path.isdir("./countryfiles_source"):
            #print(wikipage)
            out = open("./countryfiles_source/" + table + ".examples","w")
            out.write(wikipage)
            out.flush()
            print("Directory ./countryfiles_source exists. Wrote file {}".format(out.name))
            out.close()
            
        else:
            os.mkdir("./countryfiles_source")
            with open("./countryfiles_source/" + table + ".examples","w") as out:
                out.write(wikipage)
                print("./countryfiles_source doesn't exist. Wrote file {}".format(out.name)")

In [23]:
create_source_table_value_examples()


./langfiles_source doesn't exist
./countryfiles_source doesn't exist
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-vlg_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ru_(ru).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/pl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_dz_(ar).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_sk_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ca.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_lu_(lb).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/pl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_es_(ca).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_pl_(pl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ar_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_bo_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/en.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_hk-hb_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/cs.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ch_(it).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_nl-aw_(nl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/no.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_es-ct_(ca).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-wal_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_pk_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments-old_ch_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ua_(uk).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_dk-bygninger_(da).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ad_(ca).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_no_(no).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ie_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/en.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_nl_(nl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ru.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ch2_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ru.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_cl_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/uk.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_gb-eng_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_pl-old_(pl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_fr-object_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ro.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_mt_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ch3_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ro.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_mx_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ch_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_cz_(cs).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ro.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_dk-fortidsminder_(da).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/en.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_th_(th).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_fr_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_de-nrw-bm_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_it_(it).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments-old_ch_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/es.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/se_ship.examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/hy.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_uy_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_it-88_(ca).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-bru_(nl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/es.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_ve_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/pl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_nl-gem_(nl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/no.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_jo_(ar).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/pl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_hu_(hu).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/da.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments-old_ch_(de).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ca.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/se_arbetsl.examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/th.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-wal_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/pl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_gh_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/sv.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_es-gl_(gl).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/da.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_nl-aw_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/de.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_pa_(es).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/en.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_gb-nir_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/ru.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_by_(be-tarask).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/nl.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/se_fornmin.examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-vlg_(en).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/fr.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_cm_(fr).examples
Directory ./langfiles_source exists. Wrote file ./langfiles_source/en.examples
Directory ./countryfiles_source exists. Wrote file ./countryfiles_source/monuments_be-wal_(nl).examples
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-23-70c319e082ef> in <module>()
----> 1 create_source_table_value_examples()

<ipython-input-22-84eb41c963c9> in create_source_table_value_examples()
      7     for table in final_tables:
      8         sql = "SELECT * FROM monuments_all"
----> 9         df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
     10 
     11         h1 = "= Non-standardized fields from table " + table + "=\n"

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
    497             sql, index_col=index_col, params=params,
    498             coerce_float=coerce_float, parse_dates=parse_dates,
--> 499             chunksize=chunksize)
    500 
    501     try:

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize)
   1607             frame = _wrap_result(data, columns, index_col=index_col,
   1608                                  coerce_float=coerce_float,
-> 1609                                  parse_dates=parse_dates)
   1610             return frame
   1611 

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in _wrap_result(data, columns, index_col, coerce_float, parse_dates)
    135 
    136     frame = DataFrame.from_records(data, columns=columns,
--> 137                                    coerce_float=coerce_float)
    138 
    139     _parse_date_columns(frame, parse_dates)

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/frame.py in from_records(cls, data, index, exclude, columns, coerce_float, nrows)
   1020             columns = columns.drop(exclude)
   1021 
-> 1022         mgr = _arrays_to_mgr(arrays, arr_columns, result_index, columns)
   1023 
   1024         return cls(mgr)

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
   5248     axes = [_ensure_index(columns), _ensure_index(index)]
   5249 
-> 5250     return create_block_manager_from_arrays(arrays, arr_names, axes)
   5251 
   5252 

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes)
   3926 
   3927     try:
-> 3928         blocks = form_blocks(arrays, names, axes)
   3929         mgr = BlockManager(blocks, axes)
   3930         mgr._consolidate_inplace()

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/internals.py in form_blocks(arrays, names, axes)
   4023 
   4024     if len(object_items) > 0:
-> 4025         object_blocks = _simple_blockify(object_items, np.object_)
   4026         blocks.extend(object_blocks)
   4027 

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/internals.py in _simple_blockify(tuples, dtype)
   4053     not None, coerce to this dtype
   4054     """
-> 4055     values, placement = _stack_arrays(tuples, dtype)
   4056 
   4057     # CHECK DTYPE?

/home/mos/anaconda3/lib/python3.5/site-packages/pandas/core/internals.py in _stack_arrays(tuples, dtype)
   4117     stacked = np.empty(shape, dtype=dtype)
   4118     for i, arr in enumerate(arrays):
-> 4119         stacked[i] = _asarray_compat(arr)
   4120 
   4121     return stacked, placement

KeyboardInterrupt: 

In [ ]: