In [1]:
# this is a calendar scraper and report of safety boat volunteers for LHYC
# use this to contact volunteers and ensure coverage

from bs4 import BeautifulSoup, SoupStrainer
import re
import urllib
import webbrowser as wb
import random
import time
import datetime as dt
import pandas as pd

###################
# build calendars #
###################

month =  5
calendars = []
p1 = 'http://lhycsailing.com/calendar?EventViewMode=1&EventListViewMode=2&SelectedDate='
p2 = '/10/2017&CalendarViewType=1'

while month <= 9:
    calendar_url = p1 + str(month) + p2
    month = month + 1
    calendars.append(calendar_url)
    
###################
# find events    #
###################  

event_ids = []
p3 = '^http://lhycsailing.com/event-'

for calendar in calendars:
    resp = urllib.request.urlopen(calendar)
    soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'))
    for link in soup.find_all(href=re.compile(p3)):
        link = link['href']
        link = re.search(p3 + '(.*)\?', link).group(1)
        event_ids.append(link)
                      
#print(event_ids)

#####################
# build event links #
#####################  

p4 = 'http://lhycsailing.com/event-'
p5 = '/Attendees'

events = []

for event_id in event_ids:
    event_url = p4 + str(event_id) + p5
    events.append(event_url)

#print(events)

#####################
# filled table data #
#####################  

p6 = '^http://lhycsailing.com/Sys/PublicProfile'
members = []
filled_events = []
filled_dates = []

registrations = []
unfilled_events = []
unfilled_dates = []

for event in events:
    try:
        resp = urllib.request.urlopen(event)
        soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'))
        
        ##  member extraction
        for member in soup.find_all(href=re.compile(p6)):
            member = member.text
            members.append(member)
            
            event_name = soup.select('div.infoText')[0]
            filled_events.extend(event_name.stripped_strings)
            
            date = soup.select('div.infoText')[1]
            filled_dates.extend(date.stripped_strings)
    except:
        pass
    
#######################
# unfilled table data #
#######################  

    try: 
        event_name = soup.select('div.infoText')[0]
        unfilled_events.extend(event_name.stripped_strings)
            
        date = soup.select('div.infoText')[1]
        unfilled_dates.extend(date.stripped_strings)
        
        registered = soup.select('h2.stepTitle')[0]
        registrations.extend(registered.stripped_strings)
    except:
        pass


C:\Users\ryangrosch\Anaconda3\lib\site-packages\bs4\__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 193 of the file C:\Users\ryangrosch\Anaconda3\lib\runpy.py. To get rid of this warning, change code that looks like this:

 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))

In [2]:
# How many times has each person volunteered?

filled_raw = pd.DataFrame(
    {'Member': members,
     'Event': filled_events,
     'Date': filled_dates
    })
filled = filled_raw.drop_duplicates()
filled_summary = filled.groupby(by='Member').count().sort_values('Event',ascending=False)[['Event']]
filled_summary


Out[2]:
Event
Member
Anderson, Mark 3
Fraser, James 3
Schroeder, Joe 3
O'Brien, Jennifer 3
Grosch, Ryan 3
Gantzer, Charles 3
Jewett, Ted 3
Wold, Jayson 3
Driessen, Vincent 2
Larson, Steven 2
Maloney, Mary 2
Crowe, Andy 2
Chesnutt, Henry 2
Getsinger, John 1
Dillon, Bob 1
Barbour, John 1
Katics, John 1
Losch, Steve 1
Loscheider, Steve 1
Mott, David 1

In [3]:
# When did they volunteer?

filled


Out[3]:
Date Event Member
0 05/13/2017 12:00 PM - 5:00 PM Saturday Safety Boat Jewett, Ted
1 05/13/2017 12:00 PM - 5:00 PM Saturday Safety Boat Schroeder, Joe
2 05/14/2017 10:30 AM - 1:00 PM Sunday Safety Boat Grosch, Ryan
3 05/14/2017 10:30 AM - 1:00 PM Sunday Safety Boat O'Brien, Jennifer
4 05/20/2017 12:00 PM - 5:00 PM Saturday Safety Boat O'Brien, Jennifer
5 05/20/2017 12:00 PM - 5:00 PM Saturday Safety Boat Losch, Steve
6 05/21/2017 10:30 AM - 1:00 PM Sunday Safety Boat Loscheider, Steve
7 05/21/2017 10:30 AM - 1:00 PM Sunday Safety Boat Anderson, Mark
8 05/27/2017 12:00 PM - 5:00 PM Saturday Safety Boat Wold, Jayson
9 05/28/2017 10:30 AM - 1:00 PM Sunday Safety Boat Getsinger, John
10 05/28/2017 10:30 AM - 1:00 PM Sunday Safety Boat Dillon, Bob
11 05/29/2017 10:30 AM - 1:00 PM Monday Safety Boat Crowe, Andy
12 05/29/2017 10:30 AM - 1:00 PM Monday Safety Boat Barbour, John
13 06/03/2017 12:00 PM - 5:00 PM Saturday Safety Boat Gantzer, Charles
14 06/04/2017 10:30 AM - 1:00 PM Sunday Safety Boat Gantzer, Charles
15 06/10/2017 12:00 PM - 5:00 PM Saturday Safety Boat Gantzer, Charles
23 06/11/2017 10:30 AM - 1:00 PM Sunday Safety Boat Anderson, Mark
24 06/18/2017 10:30 AM - 1:00 PM Sunday Safety Boat Larson, Steven
25 06/24/2017 12:00 PM - 5:00 PM Saturday Safety Boat Maloney, Mary
26 06/24/2017 12:00 PM - 5:00 PM Saturday Safety Boat Jewett, Ted
27 06/25/2017 10:30 AM - 1:00 PM Sunday Safety Boat Jewett, Ted
28 06/25/2017 10:30 AM - 1:00 PM Sunday Safety Boat Chesnutt, Henry
29 07/01/2017 12:00 PM - 5:00 PM Saturday Safety Boat Grosch, Ryan
30 07/01/2017 12:00 PM - 5:00 PM Saturday Safety Boat Fraser, James
31 07/02/2017 10:30 AM - 1:00 PM Sunday Safety Boat Schroeder, Joe
32 07/04/2017 10:30 AM - 1:00 PM Monday Safety Boat Katics, John
33 07/04/2017 10:30 AM - 1:00 PM Monday Safety Boat Fraser, James
34 07/08/2017 12:00 PM - 5:00 PM Saturday Safety Boat Mott, David
43 07/09/2017 10:30 AM - 1:00 PM Sunday Safety Boat Chesnutt, Henry
44 07/15/2017 12:00 PM - 5:00 PM Saturday Safety Boat Maloney, Mary
45 07/16/2017 10:30 AM - 1:00 PM Sunday Safety Boat Anderson, Mark
46 07/16/2017 10:30 AM - 1:00 PM Sunday Safety Boat O'Brien, Jennifer
47 07/29/2017 12:00 PM - 5:00 PM Saturday Safety Boat Fraser, James
48 07/30/2017 10:30 AM - 1:00 PM Sunday Safety Boat Larson, Steven
50 08/06/2017 10:30 AM - 1:00 PM Sunday Safety Boat Wold, Jayson
51 08/19/2017 12:00 PM - 5:00 PM Saturday Safety Boat Crowe, Andy
52 09/03/2017 10:30 AM - 1:00 PM Sunday Safety Boat Wold, Jayson
53 09/04/2017 10:30 AM - 1:00 PM Monday Safety Boat Schroeder, Joe
54 09/09/2017 12:00 PM - 5:00 PM Saturday Safety Boat Driessen, Vincent
58 09/10/2017 10:30 AM - 1:00 PM Sunday Safety Boat Driessen, Vincent
59 09/30/2017 12:00 PM - 5:00 PM Saturday Safety Boat - Commodore Cup Grosch, Ryan

In [4]:
# Where do we still need volunteers?

unfilled_raw = pd.DataFrame(
    {'Registered': registrations,
     'Event': unfilled_events,
     'Date': unfilled_dates
    })
unfilled = unfilled_raw.drop_duplicates()
unfilled['Registered'] = unfilled['Registered'].str.extract('(\d+)').fillna(0).astype(int)
unfilled = unfilled.query('Registered < 2')
unfilled['Date'] = unfilled['Date'].str[:10]
unfilled['Date'] = pd.to_datetime(unfilled['Date'])
unfilled = unfilled.set_index('Date')
unfilled = unfilled['20170716':'20170922']
unfilled


C:\Users\ryangrosch\Anaconda3\lib\site-packages\ipykernel_launcher.py:9: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
  if __name__ == '__main__':
C:\Users\ryangrosch\Anaconda3\lib\site-packages\ipykernel_launcher.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
Out[4]:
Event Registered
Date
2017-08-05 Saturday Safety Boat 1
2017-08-12 Saturday Safety Boat 1
2017-08-13 Sunday Safety Boat 1
2017-08-19 Saturday Safety Boat 1
2017-08-20 Sunday Safety Boat 1
2017-08-26 Saturday Safety Boat 0
2017-08-27 Sunday Safety Boat 1
2017-09-02 Saturday Safety Boat 0
2017-09-04 Monday Safety Boat 1
2017-09-09 Saturday Safety Boat 1