In [1]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

import os
import sys

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src', 'data')
sys.path.append(src_dir)

In [2]:
%aimport scrape_buda

In [3]:
from scrape_buda import BudaRating

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
pd.set_option('display.max_columns', None)

In [6]:
import pymysql as mdb

con = mdb.connect('localhost', 'budauser', 'TrotNixon', 'buda_prod') #host, user, password,

In [7]:
sqlfile = os.path.join(os.getcwd(), os.pardir, 'data', 'raw', 'buda_prod_backup_2016-08-31.sql')

In [8]:
interim_dir = os.path.join(os.getcwd(), os.pardir, 'data', 'interim')

In [12]:
sqlfile = """SHOW TABLES;"""
buda_tables = pd.read_sql(sqlfile, con)

In [14]:
buda_tables['Tables_in_buda_prod'].values


Out[14]:
array(['admin_contact_info', 'application_division_map', 'applications',
       'buda_admin_contact_info', 'buda_admin_positions',
       'buda_admin_positions_map', 'buda_broken_links', 'buda_contacts',
       'buda_devel_documentation', 'buda_groups',
       'buda_hatleague_captain_pwd', 'buda_hatleague_details',
       'buda_hatleague_divisions', 'buda_hatleague_locations',
       'buda_hatleague_player_rankings', 'buda_hatleague_rankings',
       'buda_hatleague_rosters', 'buda_hatleague_rosters_updates',
       'buda_hatleague_schedules', 'buda_hatleague_teams',
       'buda_hs_spirit_scores', 'buda_leagues', 'buda_ll_helpers',
       'buda_ll_schedule', 'buda_ll_volunteers', 'buda_logger',
       'buda_mail_list_requests', 'buda_object_list', 'buda_objects',
       'buda_permissions_map', 'buda_pickup_listings', 'buda_school_teams',
       'buda_sessions', 'buda_team_contact_map', 'buda_team_league_map',
       'buda_teams', 'buda_teams_2003', 'buda_temp_pwd',
       'buda_tournament_signups', 'buda_tournaments',
       'buda_user_group_map', 'buda_users', 'buda_users_contact_info',
       'buda_users_public', 'buda_volunteers', 'buda_want_team',
       'budacol_contact_info', 'captain_ratings', 'commissioner_map',
       'contact_info', 'directions', 'division_team_settings', 'donations',
       'event_signup', 'hatleague_signup', 'hatleague_signup_updates',
       'joom_assets', 'joom_associations', 'joom_banner_clients',
       'joom_banner_tracks', 'joom_banners', 'joom_categories',
       'joom_comprofiler', 'joom_comprofiler_field_values',
       'joom_comprofiler_fields', 'joom_comprofiler_lists',
       'joom_comprofiler_members', 'joom_comprofiler_plugin',
       'joom_comprofiler_sessions', 'joom_comprofiler_tabs',
       'joom_comprofiler_userreports', 'joom_comprofiler_views',
       'joom_contact_details', 'joom_content', 'joom_content_frontpage',
       'joom_content_rating', 'joom_core_log_searches', 'joom_extensions',
       'joom_finder_filters', 'joom_finder_links',
       'joom_finder_links_terms0', 'joom_finder_links_terms1',
       'joom_finder_links_terms2', 'joom_finder_links_terms3',
       'joom_finder_links_terms4', 'joom_finder_links_terms5',
       'joom_finder_links_terms6', 'joom_finder_links_terms7',
       'joom_finder_links_terms8', 'joom_finder_links_terms9',
       'joom_finder_links_termsa', 'joom_finder_links_termsb',
       'joom_finder_links_termsc', 'joom_finder_links_termsd',
       'joom_finder_links_termse', 'joom_finder_links_termsf',
       'joom_finder_taxonomy', 'joom_finder_taxonomy_map',
       'joom_finder_terms', 'joom_finder_terms_common',
       'joom_finder_tokens', 'joom_finder_tokens_aggregate',
       'joom_finder_types', 'joom_jaem_log', 'joom_jaem_services',
       'joom_kunena_aliases', 'joom_kunena_announcement',
       'joom_kunena_attachments', 'joom_kunena_categories',
       'joom_kunena_configuration', 'joom_kunena_keywords',
       'joom_kunena_keywords_map', 'joom_kunena_messages',
       'joom_kunena_messages_text', 'joom_kunena_polls',
       'joom_kunena_polls_options', 'joom_kunena_polls_users',
       'joom_kunena_ranks', 'joom_kunena_sessions', 'joom_kunena_smileys',
       'joom_kunena_thankyou', 'joom_kunena_topics',
       'joom_kunena_user_categories', 'joom_kunena_user_read',
       'joom_kunena_user_topics', 'joom_kunena_users',
       'joom_kunena_users_banned', 'joom_kunena_version', 'joom_languages',
       'joom_menu', 'joom_menu_types', 'joom_messages',
       'joom_messages_cfg', 'joom_modules', 'joom_modules_menu',
       'joom_newsfeeds', 'joom_overrider', 'joom_phocagallery',
       'joom_phocagallery_categories', 'joom_phocagallery_comments',
       'joom_phocagallery_fb_users', 'joom_phocagallery_img_comments',
       'joom_phocagallery_img_votes',
       'joom_phocagallery_img_votes_statistics', 'joom_phocagallery_tags',
       'joom_phocagallery_tags_ref', 'joom_phocagallery_user',
       'joom_phocagallery_votes', 'joom_phocagallery_votes_statistics',
       'joom_redirect_links', 'joom_redj_errors', 'joom_redj_pages404',
       'joom_redj_redirects', 'joom_redj_referer_urls',
       'joom_redj_referers', 'joom_redj_visited_urls', 'joom_schemas',
       'joom_session', 'joom_template_styles', 'joom_update_categories',
       'joom_update_sites', 'joom_update_sites_extensions', 'joom_updates',
       'joom_user_notes', 'joom_user_profiles', 'joom_user_usergroup_map',
       'joom_usergroups', 'joom_users', 'joom_viewlevels', 'joom_weblinks',
       'joom_wf_profiles', 'phinxlog', 'player_extras', 'team_creation',
       'tourny_pickups', 'tourny_prefs', 'waivers'], dtype=object)

In [56]:
sqlfile = """SELECT * FROM joom_jaem_log;"""
buda_teams = pd.read_sql(sqlfile, con)

In [57]:
buda_teams


Out[57]:
id ext_id service_id check_date check_info
0 1 template-10009 None 2013-11-14 21:46:08 <div class=\"status-not-uploaded\">Sorry, no v...

In [34]:
sqlfile = """SELECT * FROM buda_users_public;"""
buda_users_public = pd.read_sql(sqlfile, con, index_col='user_id')

In [35]:
sqlfile = """SELECT * FROM buda_hatleague_player_rankings;"""
buda_hatleague_player_rankings = pd.read_sql(sqlfile, con)

In [36]:
buda_hatleague_player_rankings = buda_hatleague_player_rankings.rename(columns={'player_id':'user_id'})

In [37]:
buda_hatleague_player_rankings = buda_hatleague_player_rankings.set_index('user_id')

In [38]:
buda_hatleague_player_rankings = buda_hatleague_player_rankings.drop('edited_date', axis=1)

In [39]:
self_ratings = buda_hatleague_player_rankings.join(buda_users_public)

In [40]:
self_ratings[(self_ratings['last_name'] == 'Bussmann') & (self_ratings['first_name'] == 'Shane')]


Out[40]:
league_id rank_type status edited_by modification_id rank experience other_sport throwing break_mark huck receiving height speed shirt recommend_captain user_name email password first_name last_name edited_date last_login
user_id
30295 0 1 NaN 30295 None 49.0 div2 1.0 reliable sometime none both_hands 73.0 fast large 0.0 sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 30844 1 NaN 3348 None 40.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 30919 1 NaN 3424 None 40.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 30919 2 NaN 919 None 53.0 hat 1.0 reliable sometime short one_hand 0.0 average NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 30924 1 NaN 10373 None 40.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 30924 2 NaN 30295 None 59.0 div2 1.0 reliable sometime none leaping 0.0 fast NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 31034 1 NaN 3424 None 55.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 31034 2 NaN 30295 None 66.0 handle_div2 1.0 reliable sometime short one_hand 0.0 fast NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 32741 1 NaN 3348 None 60.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 32746 1 0.0 10373 None 57.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 34405 1 NaN 10373 None 57.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 34405 2 NaN 20329 None 66.0 handle_div2 1.0 reliable sometime short one_hand 0.0 fast 0.0 sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 37668 1 NaN 20549 None 61.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 38484 1 NaN 29236 None 61.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31
30295 40258 1 NaN 29236 None 55.0 None NaN None None None None NaN None None NaN sbussmann sbussmann@gmail.com 6b1888fae695d8e52a01ae5b0fefc2e1:oLVn5pTC3oowU... Shane Bussmann 2010-11-18 17:27:34 2016-08-10 19:19:31

In [41]:
hmm = self_ratings.replace('nan', -1)

In [42]:
len(hmm[hmm['rank'] == -1])


Out[42]:
0

In [43]:
sqlfile = """SELECT league_id,league_name,season FROM buda_leagues;"""
buda_leagues = pd.read_sql(sqlfile, con)

In [44]:
self_ratings_league = self_ratings.reset_index().merge(buda_leagues, on='league_id')

In [45]:
len(self_ratings)


Out[45]:
47921

In [46]:
len(self_ratings_league)


Out[46]:
38296

In [47]:
self_ratings_league = self_ratings_league.drop(['modification_id', 'user_name', 'email', 'password'], axis=1)

In [48]:
self_ratings_league.to_csv(os.path.join(interim_dir, 'self_ratings.csv'))

In [50]:
self_ratings_league[(self_ratings_league['first_name'] == 'Martin')].sort('league_id')


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-50-44587d58bca7> in <module>()
----> 1 self_ratings_league[(self_ratings_league['first_name'] == 'Martin')].sort('league_id')

/Users/rbussman/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   2968             if name in self._info_axis:
   2969                 return self[name]
-> 2970             return object.__getattribute__(self, name)
   2971 
   2972     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'sort'

In [51]:
ratings.team_players['40328'][2].split(',')[1][1:]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-51-5986df920a53> in <module>()
----> 1 ratings.team_players['40328'][2].split(',')[1][1:]

NameError: name 'ratings' is not defined

In [54]:
for league_id in ratings.league_teams:
    if '40328' in ratings.league_teams[league_id]:
        print(league_id)


40258

In [ ]: