In [18]:
import MySQLdb
import pickle
from collections import Counter
import pandas as pd
In [12]:
conn = MySQLdb.connect(user='mos', passwd='', db='monuments_db', charset='utf8', use_unicode=True)
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")
Out[12]:
In [13]:
def create_list_of_source_tables_pickle():
cursor.execute("SHOW TABLES")
non_country_tables = set(["prox_search","admin_tree","commonscat","image","monuments_all"])
results = cursor.fetchall()
all_tables = []
for pair in results:
for item in pair:
all_tables.append(item)
country_tables = list(set(all_tables) - non_country_tables) # set difference, then convert back to list
print("len(country_tables). {}".format(len(country_tables)))
cnt = Counter()
for table in country_tables:
sql = 'SELECT count(*) FROM `{}`'.format(table)
cursor.execute(conn.escape_string(sql))
res = cursor.fetchone()
cnt.update({table: res[0]})
empty_tables = []
for table,count in cnt.items():
if count == 0:
empty_tables.append(table)
print("len(empty_tables): {}".format(len(empty_tables)))
final_tables = set(country_tables) - set(empty_tables)
print("len(final_tables): {}".format(len(final_tables)))
pickle.dump(final_tables, open("./final_tables.pickle","wb"))
In [14]:
def create_country_codes_pickle():
print(list(dict(df.country.value_counts()).keys()))
country_codes = list(dict(df.country.value_counts()).keys())
pickle.dump(country_codes, open("./country_codes.pickle","wb"))
In [15]:
create_list_of_source_tables_pickle()
In [17]:
# Load full table into memory
sql = "SELECT * FROM monuments_all"
df = pd.io.sql.read_sql(conn.escape_string(sql), conn)
create_country_codes_pickle()
In [ ]: