Import Packages


In [1]:
from icalendar import *
from datetime import date, datetime, timedelta
from __future__ import print_function
import mysql.connector
from mysql.connector import errorcode
import pickle
import csv
import pandas
from pandas.io import sql
import numpy as np
import os
import re
import glob
import pytz
import calendar_parser as cp 
# for calendar_parser, I downloaded the Python file created for this package
# https://github.com/oblique63/Python-GoogleCalendarParser/blob/master/calendar_parser.py
# and saved it in the working directory with my Python file (Jupyter Notebook file). 
# In calendar_parser.py, their function _fix_timezone is very crucial for my code to 
# display the correct local time.

Establish Connection with MySQL (optional approach)


In [2]:
User =     # MySQL Username
Password = # MySQL password
Host =     # MySQL host
cnx = mysql.connector.connect(user=User, password=Password, host=Host)

In [3]:
cursor = cnx.cursor()

In [4]:
# Approach / Code modified from MySQL Connector web page
DB_NAME = "CalDb"

# 1) Creates database if it doesn't already exist
# 2) Then connects to the database
def create_database(cursor):
    try:
        cursor.execute(
            "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(DB_NAME))
    except mysql.connector.Error as err:
        print("Failed creating database: {}".format(err))
        exit(1)

try:
    cnx.database = DB_NAME    
except mysql.connector.Error as err:
    if err.errno == errorcode.ER_BAD_DB_ERROR:
        create_database(cursor)
        cnx.database = DB_NAME
    else:
        print(err)
        exit(1)

In [5]:
# Create table specifications
TABLES = {}
TABLES['eBike'] = (
    "CREATE TABLE IF NOT EXISTS `eBike` ("
    "  `eBikeName` varchar(10),"
    "  `Organizer` varchar(100),"
    "  `Created` datetime NOT NULL,"
    "  `Start` datetime NOT NULL,"
    "  `End` datetime NOT NULL"
    ") ENGINE=InnoDB")

In [6]:
# If table does not already exist, this code will create it based on specifications
for name, ddl in TABLES.iteritems():
    try:
        print("Creating table {}: ".format(name), end='')
        cursor.execute(ddl)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
            print("already exists.")
        else:
            print(err.msg)
    else:
        print("OK")


Creating table eBike: OK

In [7]:
# Obtain current count from each calendar to read in and add additional entries only
cursor.execute("SELECT COUNT(*) FROM eBike WHERE eBikeName = 'Gold'")
GoldExistingCount = cursor.fetchall()

cursor.execute("SELECT COUNT(*) FROM eBike WHERE eBikeName = 'Blue'")
BlueExistingCount = cursor.fetchall()

In [13]:
#print(GoldExistingCount[0][0])


0

Read in Calendar Data


In [20]:
# Declare lists
eBikeName = []
Organizer = []
DTcreated = []
DTstart = []
DTend = []
Counter = 0

In [21]:
# Open first e-bike calendar, appends data, then repeats for second calendar. 
# A future modification I am working on is to bring this into one loop such that 
# as many calendars as desired for a specific table can be read in from one folder. 
#
# Additionally, I plan to look into potential for using the .ics url to read in
# calendar data so that the file does not need to be updated each time this code
# is run for analysis of calendar data. 
b = open('Gold.ics','rb')

In [22]:
cal = Calendar.from_ical(b.read())

timezones = cal.walk('VTIMEZONE')

for k in cal.walk():
    if k.name == "VEVENT":
        Counter = Counter + 1
        if Counter > GoldExistingCount[0][0]:
            eBikeName.append('Gold')
            Organizer.append( re.sub(r'mailto:', "", str(k.get('ORGANIZER') ) ) )
            DTcreated.append( cp._fix_timezone( k.decoded('CREATED'), pytz.timezone(timezones[0]['TZID']) ) )
            DTstart.append( cp._fix_timezone( k.decoded('DTSTART'), pytz.timezone(timezones[0]['TZID']) ) )
            DTend.append( cp._fix_timezone( k.decoded('DTEND'), pytz.timezone(timezones[0]['TZID']) ) )

b.close()

In [23]:
# Resetting 'Counter' to 0 and opening next calendar...
Counter = 0
b = open('Blue.ics','rb')

In [24]:
cal = Calendar.from_ical(b.read())

timezones = cal.walk('VTIMEZONE')

for k in cal.walk():
    if k.name == "VEVENT":
        Counter = Counter + 1
        if Counter > BlueExistingCount[0][0]:
            eBikeName.append('Blue')
            Organizer.append( re.sub(r'mailto:', "", str(k.get('ORGANIZER') ) ) )
            DTcreated.append( cp._fix_timezone( k.decoded('CREATED'), pytz.timezone(timezones[0]['TZID']) ) )
            DTstart.append( cp._fix_timezone( k.decoded('DTSTART'), pytz.timezone(timezones[0]['TZID']) ) )
            DTend.append( cp._fix_timezone( k.decoded('DTEND'), pytz.timezone(timezones[0]['TZID']) ) )

b.close()

In [25]:
# Now that calendar data is fully read in, create a list with data in a format for 
# entering into the MySQL database. 
# 
# At this point, if the MySQL Connector component is not desired, other approaches  
# include creating a Pandas dataframe or something else.
# For reference, a Pandas dataframe could be created with the following command: 
# df = pandas.DataFrame({'ORGANIZER' : Organizer,'CREATED' : DTcreated, 'DTSTART' : DTstart,'DTEND': DTend})
eBikeData = []
for i in range(len(DTcreated)):
    eBikeData.append((eBikeName[i], Organizer[i], DTcreated[i], DTstart[i], DTend[i]))

MySQL Connection to Push Out and Read In Data


In [28]:
# Insert calendar data into MySQL table eBike
cursor.executemany("INSERT INTO eBike (eBikeName, Organizer, Created, Start, End) VALUES (%s, %s, %s, %s, %s)", 
                   eBikeData)
cnx.commit()

In [1]:
# Find emails associated with reservations created at latest 7 days ago
cursor.execute("SELECT Organizer, Start FROM eBike WHERE DATEDIFF(Start, CURDATE()) >= 7")
WeeklyEmail = cursor.fetchall()
print(WeeklyEmail)

In [2]:
# Find total e-bike rides by user
cursor.execute("SELECT Organizer, COUNT(*) AS Total_Rides FROM eBike GROUP BY Organizer ORDER BY Total_Rides DESC;")
TotalRides_by_User = cursor.fetchall()
print(TotalRides_by_User)

In [32]:
cursor.close()


Out[32]:
True

In [33]:
cnx.close()

Desired Features (planned future improvements)


In [ ]:
# Features to add within the next week: Total Trips by Reservation Time, Total Trips by Weekday, 
# Average and Maximum Hours by Weekday, Average and Maximum Utilization by Weekday, 
# Find how far in advance reservations are created

In [ ]:
# Generate reports from SQL query results

In [8]:
# SINGLE LOOP READ-IN FEATURE

# Enter desired directory where .ics files are contained
#path = '/Users/dmeroux/Documents/Calendar_Data_Extraction_V1.0'
#for infile in glob.glob( os.path.join(path, '*.ics') ):
#    b = open(infile,'rb')
#    cal = Calendar.from_ical(b.read())
#    for k in cal.walk():
#        if k.name == "VEVENT": 
#            if str(type(k.decoded('DTSTART'))) == "<class 'datetime.datetime'>":
#                Organizer.append( re.sub(r'mailto:', "", str(k.get('ORGANIZER') ) ) )   # email address of organizer
#                DTcreated.append( datetime.timestamp(k.decoded('CREATED') ) ) # reservation created date
#                DTstart.append( datetime.timestamp(k.decoded('DTSTART') ) )  # reservation start date
#                DTend.append( datetime.timestamp(k.decoded('DTEND') ) )     # reservation end date
#    b.close()

In [65]:
# URL FEATURE

In [83]:
# Option to work with Pandas Dataframe (this code runs properly)
################################### ISSUE TRANSFERRING 'SUMMARY' INTO MYSQL "TOO LONG"
#df = pandas.DataFrame({'ORGANIZER' : Organizer,'CREATED' : DTcreated, 'DTSTART' : DTstart,'DTEND': DTend})

In [ ]:
# Add use case with PySpark
# FOR PYSPARK APPROACH, TRY CONNECTING TO MYSQL https://www.supergloo.com/fieldnotes/spark-sql-mysql-python-example-jdbc/