In [1]:
import pandas as pd # Beautiful tool for data wrangling! e.g. '!pip install pandas' from a Notebook
# See https://mariadb.com/blog/how-connect-python-programs-mariadb e.g. '!pip install mysql' from Notebook
import MySQLdb
import re
from collections import Counter
import os
from collections import OrderedDict
import random
pd.set_option("display.max_rows",35) # Useful when having large Pandas DataFrames like we do here
In [4]:
def create_monuments_all_value_example_tables(df):
"""Takes a Pandas DataFrame object and writes wikitables to files named after the table they are
produced from to {the current Directory}/wikitables/
"""
languages = ['az', 'cs', 'da', 'ru', 'nl', 'it', 'fr', 'ca', 'hu', 'sv', 'lb', 'den', 'no',\
'es', 'sr', 'uk', 'de', 'ar', 'be-tarask', 'he', 'en', 'hy', 'gl', 'pl', 'et', 'th', 'ro', 'sk', 'pt']
country_codes = dict(df.country.value_counts()).keys()
for code in country_codes:
h1 = "= Non-standardized fields from country " + code + " in monuments_all =\n"
column_tables = []
country = df[df["country"] == code]
lang = country.lang.sample(n=1).to_string().split(" ")[1] # A very complicated way of getting one value!
columns = country.columns
page_sections = []
for column in columns:
# the listed fields we avoid are assumed to be standardized
if column not in ["country","lang","project","changed","lat","lon","lat_int","lon_int"]:
h2 = "== 10 random samples from field " + column + " ==\n"
values = country[column]
try:
sample = values.sample(n=10)
examples = sample.values
except ValueError as e:
print("country: {} cannot be sampled.\n Error: {}\n Total objects is {}. Skipping country.".format(code, e, len(values)))
break
table_header = '{| class="wikitable" style="width: 675px;\n'
table_name = '|+ '+code + "-" + column + " 10 random samples\n"
# create table columns
table_columns = "! scope='col' style='width: 225px;' |" + str(column) + "\n" + \
"! scope='col' style='width: 225px; height: 20px;'|" + "Conversion \n" + \
"! scope='col' style='width: 225px;' | Comment\n|-\n"
table_rows = []
for example in examples:
row="| style='height: 20px;'| " + str(example) + "\n|\n|\n|-\n"
table_rows.append(row)
table_rows_str = "".join(table_rows)
# Fill in examples values from the first record in the table
table_footer = "\n|}"
else:
continue
column_tables.append(h2)
column_table = table_header + table_name + table_columns + table_rows_str[:-1] + table_footer + "\n"
column_tables.append(column_table)
wikipage = h1 + "\n".join(column_tables)
if os.path.isdir("./langfiles_all"):
#print(wikipage)
out = open("./langfiles_all/" + lang + ".examples","w")
out.write(wikipage)
out.flush()
print("Directory ./langfiles_all exists. Wrote file {}".format(out.name))
out.close()
else:
os.mkdir("./langfiles_all")
with open("./langfiles_all/" + lang + ".examples","w") as out:
out.write(wikipage)
print("./langfiles_all doesn't exist")
if os.path.isdir("./countryfiles_all"):
#print(wikipage)
out = open("./countryfiles_all/" + code + ".examples","w")
out.write(wikipage)
out.flush()
print("Directory ./countryfiles_all exists. Wrote file {}".format(out.name))
out.close()
else:
os.mkdir("./countryfiles_all")
with open("./countryfiles_all/" + code + ".examples","w") as out:
out.write(wikipage)
print("./countryfiles_all doesn't exist")
In [91]:
conn = MySQLdb.connect(user='mos', passwd='', db='monuments_db', charset='utf8', use_unicode=True)
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")
# Load full table into memory
sql = "SELECT * FROM monuments_all"
df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
create_monuments_all_value_example_tables(df)
In [ ]: