In [1]:
import pandas as pd
import numpy as np
import sklearn
from pygeocoder import Geocoder
import time
import datetime
import networkx
from sklearn.svm import SVC
from sklearn import neighbors, cross_validation, mixture
import scipy.stats
from IPython.display import display, clear_output
import matplotlib.pyplot as plt 

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
data = pd.read_csv('11700TrackingData.csv')

In [3]:
del data['Unnamed: 0']

Step 1: Pre-Proccessing Commands

Part of the pre-proccessing was performed manually


In [4]:
data.head()


Out[4]:
date time long lat Google Location Address week day
0 10/1/14 12:02:21 AM -122.274711 37.864022 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
1 10/1/14 12:03:22 AM -122.274735 37.864002 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
2 10/1/14 12:04:21 AM -122.274760 37.864008 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
3 10/1/14 12:05:22 AM -122.274750 37.864025 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
4 10/1/14 12:06:21 AM -122.274749 37.864044 Berkeley Grant Street 2420 Grant Street, Berkeley, CA 94703, USA 2

In [46]:
data.columns = ['date','time','long','lat','Google Location','Address','week day']
data.head()


Out[46]:
date time long lat Google Location Address week day
0 10/1/14 12:02:21 AM -122.274711 37.864022 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
1 10/1/14 12:03:22 AM -122.274735 37.864002 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
2 10/1/14 12:04:21 AM -122.274760 37.864008 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
3 10/1/14 12:05:22 AM -122.274750 37.864025 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
4 10/1/14 12:06:21 AM -122.274749 37.864044 Berkeley Grant Street 2420 Grant Street, Berkeley, CA 94703, USA 2

Step 2: Reverse GeoCoding Functions

Geo Encoding is done sequentially due to Google API Limitaiton


In [43]:
def reverseGeo(lat1, long1):
#results = Geocoder.reverse_geocode(37.875741,-122.260363)
  return Geocoder.reverse_geocode(lat1,long1)

In [44]:
print reverseGeo(37.864022,-122.274711).city, reverseGeo(37.864022,-122.274711).route


Berkeley Grant Street

In [7]:
data.head()


Out[7]:
date time long lat Google Location Address week day
0 10/1/14 12:02:21 AM -122.274711 37.864022 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
1 10/1/14 12:03:22 AM -122.274735 37.864002 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
2 10/1/14 12:04:21 AM -122.274760 37.864008 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
3 10/1/14 12:05:22 AM -122.274750 37.864025 Berkeley Grant Street 2422 Grant Street, Berkeley, CA 94703, USA 2
4 10/1/14 12:06:21 AM -122.274749 37.864044 Berkeley Grant Street 2420 Grant Street, Berkeley, CA 94703, USA 2

In [8]:
del data['Unnamed: 0']

In [41]:
locArray = data[:][['lat','long']].values

In [18]:
data['Google Location'] = ''
data['Full Address'] = ''
data['time'] = ''

Geo-Coding is done in steps due to Google API limitation


In [47]:
start = currentIndex #2200
end = currentIndex + 2500
for index, locPair in enumerate(locArray[start:end]):
    time.sleep(0.1)
    print index
    result = reverseGeo(locPair[0],locPair[1])
   
    if (result.route is not None):
        clear_output(wait=True)
        print 'city'
        print result.route
        data['Google Location'][start + index] = str(str(result.city) + ' '+ str(result.route))
        data['Address'][start + index] = result.formatted_address
    else:
        data['Google Location'][start + index] = result.formatted_address
        data['Address'][start + index] = result.formatted_address
    
    
    #print str(result.city + ' '+ result.route)
print 'done codeing from 0 to 4200'


city
Sather Road
done codeing from 0 to 4200

In [48]:
currentIndex = currentIndex + 2500
currentIndex


Out[48]:
11700

Time Field Fix


In [264]:
dataWithTIme = pd.read_csv('RawTrackingData.csv')

In [283]:
timeColumn  =  dataWithTIme.ix[:,1]

In [284]:
data.ix[:,1] = timeColumn

In [78]:
currentIndex


Out[78]:
11700

Saving the Data of Geo Encoding Batch


In [49]:
data.to_csv('11700TrackingData.csv')
data[0:currentIndex].tail()


Out[49]:
date time long lat Google Location Address week day
11695 10/17/14 5:59:50 PM -122.259274 37.873932 Berkeley University Drive University of California, Berkeley, University... 4
11696 10/17/14 6:06:02 PM -122.259284 37.873936 Berkeley University Drive University of California, Berkeley, University... 4
11697 10/17/14 6:12:22 PM -122.259286 37.873907 Berkeley University Drive University of California, Berkeley, University... 4
11698 10/17/14 6:17:19 PM -122.259344 37.873438 Berkeley University Drive University of California, Berkeley, University... 4
11699 10/17/14 6:18:06 PM -122.259826 37.872685 Berkeley Sather Road University of California, Berkeley, Sather Roa... 4

Adding the weekday field to the data


In [50]:
data['week day'] = ''
for index, element  in enumerate(data['date']):
    t= datetime.datetime.strptime(element, '%m/%d/%y')
    data['week day'][index] = t.weekday()

Plotting Raw Data


In [51]:
plt.figure()
plt.plot(data[0:currentIndex]['lat'], data[0:currentIndex]['long'], 'o-', color='r')
plt.xlabel('lat')
plt.ylabel('long')
plt.title('Lat versus Long')
plt.figure()


Out[51]:
<matplotlib.figure.Figure at 0x106327090>
<matplotlib.figure.Figure at 0x106327090>

Step 3: Constructing the Patterns Table

Table of time spend at every location (loc, start time, end time , delta t, mean lat long)


In [52]:
data.ix[0]['Google Location']


Out[52]:
'Berkeley Grant Street'

In [53]:
import datetime

locationS = pd.DataFrame({'Brief Location':[],'TimeIn':[],'TimeOut':[],'deltaT':[],'MeanLat':[],'MeanLong':[],'WeekDay':[]})
locationS.head()

currentLocation = data.ix[0]['Address']
startTime = data.ix[0]['time']
ArrayOfLat = []
ArrayOfLong = []
ArrayOfLat.append(data.ix[0]['lat'])
ArrayOfLong.append(data.ix[0]['long'])
weekDay = data.ix[0]['week day']
for i in range(0,currentIndex):

    if(currentLocation == data.ix[i]['Address']):
        #Update time
   
        ArrayOfLat.append(data.ix[i]['lat'])
        ArrayOfLong.append(data.ix[i]['long'])
    else:
        meanLat = np.mean(ArrayOfLat)
        meanLong = np.mean(ArrayOfLong)
        endTime = data.ix[i]['time']
        
        FMT = '%I:%M:%S %p'
        deltaTime = datetime.datetime.strptime(endTime, FMT) - datetime.datetime.strptime(startTime, FMT)
        locationS.ix[len(locationS), ['Brief Location','TimeIn','TimeOut','deltaT','MeanLat','MeanLong','WeekDay']] = [currentLocation,startTime,endTime,deltaTime,meanLat,meanLong,weekDay]
        currentLocation = data.ix[i]['Address']
        startTime = data.ix[i]['time']
        weekDay = data.ix[i]['week day']
        ArrayOfLat = []
        ArrayOfLong = []
        ArrayOfLat.append(data.ix[i]['lat'])
        ArrayOfLong.append(data.ix[i]['long'])
        
locationS[:]


Out[53]:
Brief Location MeanLat MeanLong TimeIn TimeOut WeekDay deltaT
0 2422 Grant Street, Berkeley, CA 94703, USA 37.86402 -122.2747 12:02:21 AM 12:06:21 AM 2 0:04:00
1 2420 Grant Street, Berkeley, CA 94703, USA 37.86404 -122.2748 12:06:21 AM 12:08:21 AM 2 0:02:00
2 2422 Grant Street, Berkeley, CA 94703, USA 37.86403 -122.2747 12:08:21 AM 12:11:22 AM 2 0:03:01
3 2420 Grant Street, Berkeley, CA 94703, USA 37.86408 -122.2747 12:11:22 AM 12:13:31 AM 2 0:02:09
4 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 12:13:31 AM 12:33:21 AM 2 0:19:50
5 2420 Grant Street, Berkeley, CA 94703, USA 37.86405 -122.2748 12:33:21 AM 12:34:21 AM 2 0:01:00
6 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 12:34:21 AM 3:51:22 AM 2 3:17:01
7 2420 Grant Street, Berkeley, CA 94703, USA 37.86404 -122.2747 3:51:22 AM 3:56:07 AM 2 0:04:45
8 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 3:56:07 AM 8:28:10 AM 2 4:32:03
9 2420 Grant Street, Berkeley, CA 94703, USA 37.86413 -122.2747 8:28:10 AM 8:33:40 AM 2 0:05:30
10 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2748 8:33:40 AM 8:55:41 AM 2 0:22:01
11 1809 Channing Way, Berkeley, CA 94703, USA 37.865 -122.274 8:55:41 AM 8:56:41 AM 2 0:01:00
12 2404 Martin Luther King Junior Way, Berkeley, ... 37.86494 -122.2723 8:56:41 AM 8:57:41 AM 2 0:01:00
13 1925-1999 Channing Way, Berkeley, CA 94704, USA 37.86539 -122.2712 8:57:41 AM 8:58:43 AM 2 0:01:02
14 2362-2382 Milvia Street, Berkeley, CA 94704, USA 37.86587 -122.2702 8:58:43 AM 9:00:50 AM 2 0:02:07
15 1980 Allston Way, Berkeley, CA 94704, USA 37.86879 -122.2706 9:00:50 AM 9:01:43 AM 2 0:00:53
16 2068 Center Street, Berkeley, CA 94704, USA 37.86992 -122.2692 9:01:43 AM 9:03:28 AM 2 0:01:45
17 2128 Oxford Street, Berkeley, CA 94704, USA 37.87054 -122.266 9:03:28 AM 9:04:14 AM 2 0:00:46
18 University of California, Berkeley, West Entra... 37.87135 -122.265 9:04:14 AM 9:04:59 AM 2 0:00:45
19 University of California, Berkeley, University... 37.87205 -122.2638 9:04:59 AM 9:05:44 AM 2 0:00:45
20 University of California, Berkeley, Harmon Way... 37.87206 -122.2623 9:05:44 AM 9:06:32 AM 2 0:00:48
21 University of California, Berkeley, University... 37.87302 -122.2609 9:06:32 AM 9:08:34 AM 2 0:02:02
22 University of California, Berkeley, Sather Roa... 37.87304 -122.2598 9:08:34 AM 9:09:33 AM 2 0:00:59
23 University of California, Berkeley, University... 37.87395 -122.2593 9:09:33 AM 11:09:57 AM 2 2:00:24
24 University of California, Berkeley, Haviland R... 37.87414 -122.2589 11:09:57 AM 11:57:56 AM 2 0:47:59
25 University of California, Berkeley, University... 37.87396 -122.259 11:57:56 AM 11:58:51 AM 2 0:00:55
26 University of California, Berkeley, Haviland R... 37.87418 -122.2589 11:58:51 AM 2:06:39 PM 2 2:07:48
27 University of California, Berkeley, 121 Northg... 37.8747 -122.2598 2:06:39 PM 2:07:25 PM 2 0:00:46
28 2500 Hearst Avenue, Berkeley, CA 94709, USA 37.87497 -122.2601 2:07:25 PM 2:08:11 PM 2 0:00:46
29 2500-2510 Hearst Avenue, Berkeley, CA 94709, USA 37.87501 -122.2601 2:08:11 PM 2:08:56 PM 2 0:00:45
... ... ... ... ... ... ... ...
1521 Evans Hall: Hearst Mining Circle Side, Univers... 37.87397 -122.2571 3:43:22 PM 3:47:29 PM 4 0:04:07
1522 University of California, Berkeley, University... 37.87392 -122.2571 3:47:29 PM 3:49:35 PM 4 0:02:06
1523 Evans Hall: Hearst Mining Circle Side, Univers... 37.87395 -122.2571 3:49:35 PM 3:52:39 PM 4 0:03:04
1524 University of California, Berkeley, University... 37.87396 -122.257 3:52:39 PM 3:53:39 PM 4 0:01:00
1525 Evans Hall: Hearst Mining Circle Side, Univers... 37.87397 -122.2571 3:53:39 PM 3:57:11 PM 4 0:03:32
1526 University of California, Berkeley, University... 37.87415 -122.2571 3:57:11 PM 3:59:13 PM 4 0:02:02
1527 Evans Hall: Hearst Mining Circle Side, Univers... 37.87394 -122.2571 3:59:13 PM 4:00:13 PM 4 0:01:00
1528 University of California, Berkeley, University... 37.87392 -122.2571 4:00:13 PM 4:02:19 PM 4 0:02:06
1529 Evans Hall: Hearst Mining Circle Side, Univers... 37.87394 -122.2571 4:02:19 PM 4:03:43 PM 4 0:01:24
1530 University of California, Berkeley, University... 37.87392 -122.2571 4:03:43 PM 4:04:43 PM 4 0:01:00
1531 Evans Hall: Hearst Mining Circle Side, Univers... 37.87391 -122.2571 4:04:43 PM 4:05:43 PM 4 0:01:00
1532 University of California, Berkeley, University... 37.8739 -122.2571 4:05:43 PM 4:06:43 PM 4 0:01:00
1533 Evans Hall: Hearst Mining Circle Side, Univers... 37.87396 -122.257 4:06:43 PM 4:07:44 PM 4 0:01:01
1534 University of California, Berkeley, University... 37.87397 -122.257 4:07:44 PM 4:08:44 PM 4 0:01:00
1535 Evans Hall: Hearst Mining Circle Side, Univers... 37.87395 -122.257 4:08:44 PM 4:09:44 PM 4 0:01:00
1536 University of California, Berkeley, University... 37.87397 -122.2571 4:09:44 PM 4:11:44 PM 4 0:02:00
1537 Evans Hall: Hearst Mining Circle Side, Univers... 37.87395 -122.2571 4:11:44 PM 4:13:45 PM 4 0:02:01
1538 University of California, Berkeley, University... 37.87394 -122.2571 4:13:45 PM 4:16:14 PM 4 0:02:29
1539 Evans Hall: Hearst Mining Circle Side, Univers... 37.87395 -122.257 4:16:14 PM 4:18:15 PM 4 0:02:01
1540 University of California, Berkeley, University... 37.87395 -122.257 4:18:15 PM 4:19:15 PM 4 0:01:00
1541 Evans Hall: Hearst Mining Circle Side, Univers... 37.87382 -122.2571 4:19:15 PM 4:22:15 PM 4 0:03:00
1542 University of California, Berkeley, University... 37.87386 -122.2571 4:22:15 PM 4:24:21 PM 4 0:02:06
1543 Evans Hall: Hearst Mining Circle Side, Univers... 37.87374 -122.2572 4:24:21 PM 4:26:27 PM 4 0:02:06
1544 University of California, Berkeley, University... 37.87384 -122.2571 4:26:27 PM 4:27:28 PM 4 0:01:01
1545 Evans Hall: Hearst Mining Circle Side, Univers... 37.87382 -122.2571 4:27:28 PM 4:36:53 PM 4 0:09:25
1546 University of California, Berkeley, University... 37.87387 -122.2571 4:36:53 PM 4:46:27 PM 4 0:09:34
1547 Evans Hall: Hearst Mining Circle Side, Univers... 37.87387 -122.2571 4:46:27 PM 4:51:14 PM 4 0:04:47
1548 University of California, Berkeley, University... 37.87391 -122.2571 4:51:14 PM 4:54:28 PM 4 0:03:14
1549 Evans Hall: Hearst Mining Circle Side, Univers... 37.87382 -122.2571 4:54:28 PM 5:06:52 PM 4 0:12:24
1550 University of California, Berkeley, University... 37.87397 -122.2592 5:06:52 PM 6:18:06 PM 4 1:11:14

1551 rows × 7 columns

Step 4: Trimming Locations With Stay Time Less than 15 mins


In [54]:
from datetime import datetime, timedelta 

a = locationS[locationS.deltaT > timedelta(minutes =15)]

In [55]:
a.head()


Out[55]:
Brief Location MeanLat MeanLong TimeIn TimeOut WeekDay deltaT
4 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 12:13:31 AM 12:33:21 AM 2 0:19:50
6 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 12:34:21 AM 3:51:22 AM 2 3:17:01
8 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 3:56:07 AM 8:28:10 AM 2 4:32:03
10 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2748 8:33:40 AM 8:55:41 AM 2 0:22:01
23 University of California, Berkeley, University... 37.87395 -122.2593 9:09:33 AM 11:09:57 AM 2 2:00:24

Step 5: Merging Similar Locations


In [56]:
import datetime
locationSM = pd.DataFrame({'Brief Location':[],'TimeIn':[],'TimeOut':[],'deltaT':[],'MeanLat':[],'MeanLong':[],'week day':[]})
locationSM.head()

currentLocation = locationS.iloc[0,:]['Brief Location']
startTime = locationS.ix[0]['TimeIn']
endTime = locationS.ix[0]['TimeOut']
ArrayOfLat = []
ArrayOfLong = []
ArrayOfLat.append(locationS.ix[0]['MeanLat'])
ArrayOfLong.append(locationS.ix[0]['MeanLong'])
wk = locationS.ix[0]['WeekDay']

for i in range(len(a)):
    location = a.iloc[i,:]
  
    if(currentLocation == location['Brief Location']):
        #Update time
        endTime = location['TimeOut']
        ArrayOfLat.append(location['MeanLat'])
        ArrayOfLong.append(location['MeanLong'])
        if (i == len(a)-1):
            meanLat = np.mean(ArrayOfLat)
            meanLong = np.mean(ArrayOfLong)

            FMT = '%I:%M:%S %p'
            deltaTime = datetime.datetime.strptime(endTime, FMT) - datetime.datetime.strptime(startTime, FMT)
            locationSM.ix[len(locationSM), ['Brief Location','TimeIn','TimeOut','deltaT','MeanLat','MeanLong','week day']] = [currentLocation,startTime,endTime,deltaTime,meanLat,meanLong,wk]
            currentLocation = location['Brief Location']
            startTime = location['TimeIn']
            endTime = location['TimeOut']
            ArrayOfLat = []
            ArrayOfLong = []
            wk = location['WeekDay']
            ArrayOfLat.append(location['MeanLat'])
            ArrayOfLong.append(location['MeanLong'])
        
    else:
        meanLat = np.mean(ArrayOfLat)
        meanLong = np.mean(ArrayOfLong)
     
        FMT = '%I:%M:%S %p'
        deltaTime = datetime.datetime.strptime(endTime, FMT) - datetime.datetime.strptime(startTime, FMT)
        locationSM.ix[len(locationSM), ['Brief Location','TimeIn','TimeOut','deltaT','MeanLat','MeanLong','week day']] = [currentLocation,startTime,endTime,deltaTime,meanLat,meanLong, wk]
        currentLocation = location['Brief Location']
        startTime = location['TimeIn']
        endTime = location['TimeOut']
        ArrayOfLat = []
        ArrayOfLong = []
        wk = location['WeekDay']
        ArrayOfLat.append(location['MeanLat'])
        ArrayOfLong.append(location['MeanLong'])
        
locationSM[:]


Out[56]:
Brief Location MeanLat MeanLong TimeIn TimeOut deltaT week day
0 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 12:02:21 AM 8:55:41 AM 8:53:20 2
1 University of California, Berkeley, University... 37.87395 -122.2593 9:09:33 AM 11:09:57 AM 2:00:24 2
2 University of California, Berkeley, Haviland R... 37.87416 -122.2589 11:09:57 AM 2:06:39 PM 2:56:42 2
3 2509 Hearst Avenue, Berkeley, CA 94709, USA 37.87531 -122.2598 2:14:18 PM 2:31:43 PM 0:17:25 2
4 University of California, Berkeley, University... 37.87391 -122.2593 2:34:44 PM 4:19:44 PM 1:45:00 2
5 University of California, Berkeley, 2301 Bancr... 37.86865 -122.2625 4:33:23 PM 7:07:12 PM 2:33:49 2
6 University of California, Berkeley, University... 37.87394 -122.2592 7:22:06 PM 9:42:40 PM 2:20:34 2
7 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 10:22:53 PM 9:27:57 AM -1 day, 11:05:04 2
8 University of California, Berkeley, University... 37.87393 -122.2592 9:40:53 AM 9:40:42 PM 11:59:49 3
9 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2747 10:20:25 PM 8:36:02 AM -1 day, 10:15:37 3
10 University of California, Berkeley, Haviland R... 37.87419 -122.2589 8:58:38 AM 9:52:21 AM 0:53:43 4
11 University of California, Berkeley, University... 37.87395 -122.2592 9:52:21 AM 12:08:06 PM 2:15:45 4
12 University of California, Berkeley, Haviland R... 37.87419 -122.2589 12:08:06 PM 3:58:20 PM 3:50:14 4
13 University of California, Berkeley, University... 37.87398 -122.2579 3:59:09 PM 5:29:47 PM 1:30:38 4
14 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 10:40:09 PM 11:27:04 AM -1 day, 12:46:55 4
15 University of California, Berkeley, Residence ... 37.8671 -122.2605 11:50:58 AM 12:28:48 PM 0:37:50 5
16 University of California, Berkeley, University... 37.87396 -122.2592 1:13:39 PM 5:08:17 PM 3:54:38 5
17 University of California, Berkeley, 2301 Bancr... 37.86853 -122.2625 5:39:04 PM 7:18:52 PM 1:39:48 5
18 University of California, Berkeley, University... 37.87398 -122.2593 7:18:52 PM 8:59:06 PM 1:40:14 5
19 2422 Grant Street, Berkeley, CA 94703, USA 37.86401 -122.2747 3:21:15 AM 9:48:29 AM 6:27:14 6
20 University of California, Berkeley, University... 37.87396 -122.2592 10:13:26 AM 11:57:36 AM 1:44:10 6
21 University of California, Berkeley, Gayley Roa... 37.87524 -122.2562 12:08:48 PM 12:43:11 PM 0:34:23 6
22 University of California, Berkeley, South Hall... 37.87204 -122.258 12:54:40 PM 1:12:36 PM 0:17:56 6
23 2020 Oregon Street, Berkeley, CA 94703, USA 37.85698 -122.2673 1:32:03 PM 2:14:43 PM 0:42:40 6
24 2422 Grant Street, Berkeley, CA 94703, USA 37.86401 -122.2747 2:34:55 PM 9:27:49 PM 6:52:54 6
25 University of California, Berkeley, Haviland R... 37.87423 -122.2589 1:05:15 PM 2:05:56 PM 1:00:41 0
26 University of California, Berkeley, University... 37.87396 -122.2593 2:29:21 PM 9:47:34 PM 7:18:13 0
27 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2748 9:53:35 PM 10:24:42 PM 0:31:07 0
28 2420 Grant Street, Berkeley, CA 94703, USA 37.86406 -122.2747 12:16:43 AM 12:55:30 AM 0:38:47 1
29 2422 Grant Street, Berkeley, CA 94703, USA 37.86402 -122.2747 1:18:54 AM 9:13:57 AM 7:55:03 1
... ... ... ... ... ... ... ...
135 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2747 9:36:06 PM 12:36:44 AM -1 day, 3:00:38 2
136 2425 Roosevelt Avenue, Berkeley, CA 94703, USA 37.86379 -122.2752 12:53:16 AM 1:16:40 AM 0:23:24 3
137 2422 Grant Street, Berkeley, CA 94703, USA 37.86397 -122.2746 1:16:40 AM 1:34:54 AM 0:18:14 3
138 2423 Grant Street, Berkeley, CA 94703, USA 37.86395 -122.2744 1:34:54 AM 1:53:11 AM 0:18:17 3
139 2422 Grant Street, Berkeley, CA 94703, USA 37.86398 -122.2746 1:53:11 AM 3:18:33 AM 1:25:22 3
140 2420-2422 Grant Street, Berkeley, CA 94703, USA 37.86402 -122.2745 3:18:33 AM 3:37:06 AM 0:18:33 3
141 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2746 3:37:06 AM 4:14:21 AM 0:37:15 3
142 2425 Grant Street, Berkeley, CA 94703, USA 37.86389 -122.2744 4:14:21 AM 4:33:44 AM 0:19:23 3
143 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2747 4:33:44 AM 5:46:39 AM 1:12:55 3
144 2420-2422 Grant Street, Berkeley, CA 94703, USA 37.86401 -122.2745 5:46:39 AM 6:06:08 AM 0:19:29 3
145 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2747 6:06:08 AM 9:38:12 AM 3:32:04 3
146 University of California, Berkeley, University... 37.87399 -122.2593 9:52:23 AM 11:28:42 AM 1:36:19 3
147 University of California, Berkeley, Gayley Roa... 37.8752 -122.2562 11:42:30 AM 12:03:26 PM 0:20:56 3
148 University of California, Berkeley, University... 37.87391 -122.2593 12:23:58 PM 4:20:30 PM 3:56:32 3
149 University of California, Berkeley, 2301 Bancr... 37.86867 -122.2627 4:34:52 PM 5:30:59 PM 0:56:07 3
150 University of California, Berkeley, University... 37.87395 -122.2592 6:08:32 PM 10:33:41 PM 4:25:09 3
151 2420 Grant Street, Berkeley, CA 94703, USA 37.86407 -122.2748 12:30:12 AM 12:57:55 AM 0:27:43 4
152 2432 Grant Street, Berkeley, CA 94703, USA 37.86359 -122.2746 12:59:33 AM 1:17:49 AM 0:18:16 4
153 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2746 1:17:49 AM 2:33:50 AM 1:16:01 4
154 2420-2422 Grant Street, Berkeley, CA 94703, USA 37.86398 -122.2745 2:33:50 AM 2:53:55 AM 0:20:05 4
155 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2746 2:53:55 AM 6:26:36 AM 3:32:41 4
156 2426 Grant Street, Berkeley, CA 94703, USA 37.86377 -122.2746 6:26:36 AM 6:45:36 AM 0:19:00 4
157 2422 Grant Street, Berkeley, CA 94703, USA 37.864 -122.2746 6:45:36 AM 7:41:50 AM 0:56:14 4
158 2422-2426 Grant Street, Berkeley, CA 94703, USA 37.86397 -122.2745 7:41:50 AM 8:03:20 AM 0:21:30 4
159 2422 Grant Street, Berkeley, CA 94703, USA 37.86399 -122.2747 8:03:20 AM 8:45:03 AM 0:41:43 4
160 University of California, Berkeley, Haviland R... 37.87424 -122.259 9:01:27 AM 10:03:10 AM 1:01:43 4
161 University of California, Berkeley, University... 37.87396 -122.2592 10:03:10 AM 11:07:19 AM 1:04:09 4
162 University of California, Berkeley, Gayley Roa... 37.87522 -122.2562 11:37:54 AM 12:00:37 PM 0:22:43 4
163 University of California, Berkeley, Haviland R... 37.87422 -122.259 12:07:58 PM 2:51:31 PM 2:43:33 4
164 University of California, Berkeley, University... 37.87397 -122.2593 2:51:31 PM 6:18:06 PM 3:26:35 4

165 rows × 7 columns


In [57]:
import random

def r(minimum, maximum):
    return minimum + (maximum - minimum) * random.random()

In [59]:
plt.plot(locationSM[:]['MeanLat'], locationSM[:]['MeanLong'], 'o-', color='r')
plt.xlabel('lat')
plt.ylabel('long')
plt.title('Lat versus Long')


Out[59]:
<matplotlib.text.Text at 0x10af24e10>

Visualizing Addresses on the Plot


In [60]:
plt.figure()
plt.plot(locationSM[:]['MeanLat'], locationSM[:]['MeanLong'], 'o-', color='r')
plt.xlabel('lat')
plt.ylabel('long')
plt.title('Lat versus Long')
meanX = (max(locationSM[:]['MeanLat']) + min(locationSM[:]['MeanLat'])) / 2
meanY = (max(locationSM[:]['MeanLong']) + min(locationSM[:]['MeanLong'])) / 2
for label, x, y in zip(locationSM[:]['Brief Location'], locationSM[:]['MeanLat'], locationSM[:]['MeanLong']):
    sX = 1
    sY = 1
    if (x > meanX):
        sX = 4
    else:
        sX = -1
        
    if (y > meanY):
        sY = 1
    else:
        sY = -1
    
    plt.annotate(
        label, 
        xy = (x, y), xytext = (sX*100, r(-5,5)*sY*100),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

    
    
fig = plt.figure()


<matplotlib.figure.Figure at 0x10cfdb310>

Step 6: Applying DB Scan to Further Cluster Our Data


In [61]:
import numpy as np

from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler


X = vstack([locationSM[:]['MeanLat'], locationSM[:]['MeanLong']]).T

# Compute DBSCAN
db = DBSCAN(eps=0.001, min_samples=2).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters_)

##############################################################################

plt.figure()
plt.plot(data[0:currentIndex]['lat'], data[0:currentIndex]['long'], 'o-', color='r')
plt.xlabel('lat')
plt.ylabel('long')
plt.title('Raw Data')
plt.figure()
plt.plot(locationSM[:]['MeanLat'], locationSM[:]['MeanLong'], 'o-', color='r')
plt.xlabel('lat')
plt.ylabel('long')
plt.title('Reverse Encoding Clustering|  %d clusters' % len(locationSM))
plt.show()
# Plot result
import matplotlib.pyplot as plt

# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k',markersize=6)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=6)
#plt.plot(locationSM[:]['MeanLat'], locationSM[:]['MeanLong'], 'o-', color='r')
plt.title('DB Scan Clustering |  %d clusters' % n_clusters_)
plt.show()


Estimated number of clusters: 5

Step 7: Merging the Data Based on the obtained clusters


In [62]:
import datetime

locationSM2 = pd.DataFrame({'Brief Location':[],'Cluster Number':[],'TimeIn':[],'TimeOut':[],'deltaT':[],'MeanLat':[],'MeanLong':[],'weekday':[]})
locationSM2.head()

currentLocation = locationSM.iloc[0,:]['Brief Location']
startTime = locationSM.ix[0]['TimeIn']
endTime = locationSM.ix[0]['TimeOut']
ArrayOfLat = []
ArrayOfLong = []
ArrayOfLat.append(locationSM.ix[0]['MeanLat'])
ArrayOfLong.append(locationSM.ix[0]['MeanLong'])
wk = locationSM.ix[0]['week day']
currentLabel = db.labels_[0]
for i in range(len(locationSM)):
    location = locationSM.iloc[i,:]
  
    if(db.labels_[i] == currentLabel):
        #Update time
        endTime = location['TimeOut']
        ArrayOfLat.append(location['MeanLat'])
        ArrayOfLong.append(location['MeanLong'])
    else:
        meanLat = np.mean(ArrayOfLat)
        meanLong = np.mean(ArrayOfLong)
     
        FMT = '%I:%M:%S %p'
        deltaTime = datetime.datetime.strptime(endTime, FMT) - datetime.datetime.strptime(startTime, FMT)
        if (currentLabel != -1):
            locationSM2.ix[len(locationSM2), ['Brief Location','Cluster Number','TimeIn','TimeOut','deltaT','MeanLat','MeanLong','weekday']] = [currentLocation,currentLabel,startTime,endTime,deltaTime,meanLat,meanLong,wk]
        currentLocation = location['Brief Location']
        startTime = location['TimeIn']
        endTime = location['TimeOut']
        ArrayOfLat = []
        ArrayOfLong = []
        wk = location['week day']
        ArrayOfLat.append(location['MeanLat'])
        ArrayOfLong.append(location['MeanLong'])
        currentLabel = db.labels_[i]
        
locationSM2[:]


Out[62]:
Brief Location Cluster Number MeanLat MeanLong TimeIn TimeOut deltaT weekday
0 2422 Grant Street, Berkeley, CA 94703, USA 0 37.864 -122.2747 12:02:21 AM 8:55:41 AM 8:53:20 2
1 University of California, Berkeley, University... 1 37.87406 -122.2591 9:09:33 AM 2:06:39 PM 4:57:06 2
2 2509 Hearst Avenue, Berkeley, CA 94709, USA 4 37.87531 -122.2598 2:14:18 PM 2:31:43 PM 0:17:25 2
3 University of California, Berkeley, University... 1 37.87391 -122.2593 2:34:44 PM 4:19:44 PM 1:45:00 2
4 University of California, Berkeley, 2301 Bancr... 2 37.86865 -122.2625 4:33:23 PM 7:07:12 PM 2:33:49 2
5 University of California, Berkeley, University... 1 37.87394 -122.2592 7:22:06 PM 9:42:40 PM 2:20:34 2
6 2422 Grant Street, Berkeley, CA 94703, USA 0 37.864 -122.2747 10:22:53 PM 9:27:57 AM -1 day, 11:05:04 2
7 University of California, Berkeley, University... 1 37.87393 -122.2592 9:40:53 AM 9:40:42 PM 11:59:49 3
8 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86399 -122.2747 10:20:25 PM 8:36:02 AM -1 day, 10:15:37 3
9 University of California, Berkeley, Haviland R... 1 37.87408 -122.2588 8:58:38 AM 5:29:47 PM 8:31:09 4
10 2422 Grant Street, Berkeley, CA 94703, USA 0 37.864 -122.2747 10:40:09 PM 11:27:04 AM -1 day, 12:46:55 4
11 University of California, Berkeley, University... 1 37.87396 -122.2592 1:13:39 PM 5:08:17 PM 3:54:38 5
12 University of California, Berkeley, 2301 Bancr... 2 37.86853 -122.2625 5:39:04 PM 7:18:52 PM 1:39:48 5
13 University of California, Berkeley, University... 1 37.87398 -122.2593 7:18:52 PM 8:59:06 PM 1:40:14 5
14 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86401 -122.2747 3:21:15 AM 9:48:29 AM 6:27:14 6
15 University of California, Berkeley, University... 1 37.87396 -122.2592 10:13:26 AM 11:57:36 AM 1:44:10 6
16 University of California, Berkeley, Gayley Roa... 3 37.87524 -122.2562 12:08:48 PM 12:43:11 PM 0:34:23 6
17 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86401 -122.2747 2:34:55 PM 9:27:49 PM 6:52:54 6
18 University of California, Berkeley, Haviland R... 1 37.8741 -122.2591 1:05:15 PM 9:47:34 PM 8:42:19 0
19 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86402 -122.2747 9:53:35 PM 9:13:57 AM -1 day, 11:20:22 0
20 University of California, Berkeley, University... 1 37.87392 -122.2593 10:27:30 AM 4:31:13 PM 6:03:43 1
21 University of California, Berkeley, 2301 Bancr... 2 37.86859 -122.2625 4:39:39 PM 6:51:47 PM 2:12:08 1
22 University of California, Berkeley, University... 1 37.87397 -122.2593 6:51:47 PM 10:49:09 PM 3:57:22 1
23 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86401 -122.2747 10:55:10 PM 9:54:48 AM -1 day, 10:59:38 1
24 University of California, Berkeley, University... 1 37.87407 -122.2591 10:08:53 AM 2:05:24 PM 3:56:31 2
25 2509 Hearst Avenue, Berkeley, CA 94709, USA 4 37.87531 -122.2598 2:08:38 PM 2:30:19 PM 0:21:41 2
26 University of California, Berkeley, University... 1 37.87387 -122.2592 2:34:07 PM 6:15:12 PM 3:41:05 2
27 2509 Hearst Avenue, Berkeley, CA 94709, USA 4 37.87536 -122.2598 6:19:01 PM 6:52:22 PM 0:33:21 2
28 University of California, Berkeley, University... 1 37.87398 -122.2593 6:53:18 PM 8:48:52 PM 1:55:34 2
29 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86401 -122.2746 9:40:34 PM 9:23:43 AM -1 day, 11:43:09 2
... ... ... ... ... ... ... ... ...
40 University of California, Berkeley, University... 1 37.87395 -122.2592 10:06:51 AM 8:11:10 PM 10:04:19 5
41 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86405 -122.2747 8:42:25 PM 11:28:52 AM -1 day, 14:46:27 5
42 University of California, Berkeley, University... 1 37.874 -122.2592 11:42:15 AM 9:24:54 PM 9:42:39 6
43 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86402 -122.2747 9:47:50 PM 9:32:28 AM -1 day, 11:44:38 6
44 University of California, Berkeley, University... 1 37.87413 -122.2591 9:47:14 AM 2:06:27 PM 4:19:13 0
45 2509 Hearst Avenue, Berkeley, CA 94709, USA 4 37.87536 -122.2599 2:09:29 PM 2:34:12 PM 0:24:43 0
46 University of California, Berkeley, University... 1 37.874 -122.2593 2:37:27 PM 4:18:15 PM 1:40:48 0
47 University of California, Berkeley, 2301 Bancr... 2 37.86867 -122.2627 4:32:33 PM 5:51:36 PM 1:19:03 0
48 University of California, Berkeley, University... 1 37.874 -122.2593 6:26:10 PM 9:56:45 PM 3:30:35 0
49 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86402 -122.2747 10:10:08 PM 8:57:24 AM -1 day, 10:47:16 0
50 University of California, Berkeley, University... 1 37.87416 -122.259 9:10:00 AM 11:24:32 AM 2:14:32 1
51 University of California, Berkeley, Gayley Roa... 3 37.87524 -122.2562 11:38:28 AM 12:19:40 PM 0:41:12 1
52 University of California, Berkeley, University... 1 37.87397 -122.2592 12:23:41 PM 4:59:04 PM 4:35:23 1
53 University of California, Berkeley, 2301 Bancr... 2 37.86869 -122.2627 5:10:35 PM 5:43:18 PM 0:32:43 1
54 University of California, Berkeley, University... 1 37.87398 -122.2593 6:31:29 PM 9:37:12 PM 3:05:43 1
55 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86398 -122.2747 9:45:11 PM 8:37:48 AM -1 day, 10:52:37 1
56 University of California, Berkeley, University... 1 37.87411 -122.2591 8:52:11 AM 2:08:51 PM 5:16:40 2
57 2509 Hearst Avenue, Berkeley, CA 94709, USA 4 37.8753 -122.2598 2:11:52 PM 2:29:01 PM 0:17:09 2
58 University of California, Berkeley, University... 1 37.87398 -122.2593 2:36:46 PM 4:33:54 PM 1:57:08 2
59 University of California, Berkeley, 2301 Bancr... 2 37.86869 -122.2627 4:44:05 PM 5:44:00 PM 0:59:55 2
60 University of California, Berkeley, University... 1 37.87397 -122.2593 6:28:52 PM 9:27:47 PM 2:58:55 2
61 2422 Grant Street, Berkeley, CA 94703, USA 0 37.86396 -122.2746 9:36:06 PM 9:38:12 AM -1 day, 12:02:06 2
62 University of California, Berkeley, University... 1 37.87399 -122.2593 9:52:23 AM 11:28:42 AM 1:36:19 3
63 University of California, Berkeley, Gayley Roa... 3 37.8752 -122.2562 11:42:30 AM 12:03:26 PM 0:20:56 3
64 University of California, Berkeley, University... 1 37.87391 -122.2593 12:23:58 PM 4:20:30 PM 3:56:32 3
65 University of California, Berkeley, 2301 Bancr... 2 37.86867 -122.2627 4:34:52 PM 5:30:59 PM 0:56:07 3
66 University of California, Berkeley, University... 1 37.87395 -122.2592 6:08:32 PM 10:33:41 PM 4:25:09 3
67 2420 Grant Street, Berkeley, CA 94703, USA 0 37.86393 -122.2746 12:30:12 AM 8:45:03 AM 8:14:51 4
68 University of California, Berkeley, Haviland R... 1 37.8741 -122.2591 9:01:27 AM 11:07:19 AM 2:05:52 4
69 University of California, Berkeley, Gayley Roa... 3 37.87522 -122.2562 11:37:54 AM 12:00:37 PM 0:22:43 4

70 rows × 8 columns

Step 8-1: Fitting the data into a support vector machine

Inputs: Time in Time Out

Output: Label most anticipated location

Break the list into hours


In [63]:
from datetime import date, datetime, timedelta

import datetime
import time
inc = 0
def datespan(startTimestr, endTimestr, delta=timedelta(days=1)):
    
    startTimeD = datetime.datetime.strptime(startTimestr, '%I:%M:%S %p')
    
    endTimeD = datetime.datetime.strptime(endTimestr, '%I:%M:%S %p')
    
    if (startTimeD < endTimeD):
        inc = 0
        startTimeD = datetime.datetime(2007, 3,20, startTimeD.hour, 0)
        endTimeD = datetime.datetime(2007, 3, 20,endTimeD.hour, 0)
    if (startTimeD > endTimeD):
        inc = 1
        startTimeD = datetime.datetime(2007, 3,19, startTimeD.hour, 0)
        endTimeD = datetime.datetime(2007, 3, 20,endTimeD.hour, 0)
        
    currentDate = startTimeD
    while currentDate < endTimeD:
        yield currentDate.hour
        currentDate += delta

In [64]:
locationSplitted_States = pd.DataFrame({'Cluster Number':[],'hour':[],'weekday':[]})
locationSplitted_States.head()

for i in range(len(locationSM2)):
    element = locationSM2.ix[i,:]
    #print element
    for timestamp in datespan(element['TimeIn'],
                           element['TimeOut'],
                         delta=timedelta(hours=1)): 
        locationSplitted_States.ix[len(locationSplitted_States),['Cluster Number','hour','weekday']] = [element['Cluster Number'],timestamp,element['weekday']]

In [20]:
locationSplitted_States[:]


Out[20]:
Cluster Number hour weekday
0 2 0 2
1 2 1 2
2 2 2 2
3 2 3 2
4 2 4 2
5 2 5 2
6 2 6 2
7 2 7 2
8 1 9 2
9 1 10 2
10 1 11 2
11 1 12 2
12 1 13 2
13 1 14 2
14 1 15 2
15 0 16 2
16 0 17 2
17 0 18 2
18 1 19 2
19 1 20 2
20 2 22 2
21 2 23 2
22 2 0 2
23 2 1 2
24 2 2 2
25 2 3 2
26 2 4 2
27 2 5 2
28 2 6 2
29 2 7 2
... ... ... ...
238 2 0 6
239 2 1 6
240 2 2 6
241 2 3 6
242 2 4 6
243 2 5 6
244 2 6 6
245 2 7 6
246 2 8 6
247 1 9 0
248 1 10 0
249 1 11 0
250 1 12 0
251 1 13 0
252 1 14 0
253 1 15 0
254 0 16 0
255 1 18 0
256 1 19 0
257 1 20 0
258 2 22 0
259 2 23 0
260 2 0 0
261 2 1 0
262 2 2 0
263 2 3 0
264 2 4 0
265 2 5 0
266 2 6 0
267 2 7 0

268 rows × 3 columns


In [21]:
def trainSupportVectorMachineGivenXandY(X,Y):
    clf = SVC(probability=True)
    clf.fit(X, Y)
    return clf

In [65]:
import datetime
def trainSupportVectorMachineGivenXandY(X,Y):
    clf = SVC(probability=True)
    clf.fit(X, Y)
    return clf

def getLabelOfAddress(address):
    a =  data.ix[:,4].unique().flatten()
    for i in range(len(a)):
     if (a[i] ==  address):
            return i
        
            
    return "No match found!"

def getAddressFromLabel(label):
    a =  data.ix[:,5].unique().flatten()
    return a[label]

def convertTimeDeltaToSeconds(timeD):
   return timeD.total_seconds()

def convertToHour(timeD):
    t= datetime.datetime.strptime(timeD, '%I:%M:%S %p')
    
    return t.hour

In [66]:
X_SVM_Split_wk = locationSplitted_States[:]['weekday']
X_SVM_Split_hr = locationSplitted_States[:]['hour']
Y_SVM_Split_CN = locationSplitted_States[:]['Cluster Number']

X_SVM_split = vstack([X_SVM_Split_wk,X_SVM_Split_hr]).T

Cross Validation


In [76]:
from sklearn.cross_validation import KFold
kf = KFold(len(X_SVM_split), n_folds=50, shuffle=True)
errorArray=[]
for fold in kf:
    #print fold
    myModel = trainSupportVectorMachineGivenXandY(X_SVM_split[fold[0]],Y_SVM_Split_CN[fold[0]])
    predicted = myModel.predict(X_SVM_split[fold[1]]) 
    actual = Y_SVM_Split_CN[fold[1]]
    errorArray.append((1 * (predicted == actual)).sum() / float(len(actual)))
print 'Mean Percentage Success: '
mean(errorArray)*100


Mean Percentage Success: 
Out[76]:
87.571428571428569

Step 8-2: Constructing The Spatial Calendar

Training the SVM with all the data


In [71]:
i +=1
#We want an arary that is 7 x 24 that contains the most probable stay points a person would be at within a particular hour
# 7 x 24 Xs
myModel = trainSupportVectorMachineGivenXandY(X_SVM_split,Y_SVM_Split_CN)
7*24

X_array = []
for day in range(7):
    for hour in range(24):
        Xi = [day,hour]
        X_array.append(Xi)


spatialCalendar = myModel.predict(X_array)
Calendar = np.split(spatialCalendar,7)
spatialCalendar


Out[71]:
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  2.,  2.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  2.,
        1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.])

Identify Bounds for plotting the spatial calander


In [72]:
clr = colors #['p','r','b','w','p']

currentClass = Calendar[0][0]
BigArray = []
for  element in Calendar:
    currentArray = []
    colorArray = []
    currentArray.append(0)
    #colorArray.append(clr[int(currentClass)])
  
   
    for index, cclass in enumerate(element):
        if (cclass != currentClass):
            currentArray.append(index-1)
            colorArray.append(clr[int(currentClass)])
            currentClass = cclass
    currentArray.append(24)
    colorArray.append(clr[int(currentClass)])
    BigArray.append([currentArray,colorArray])

In [73]:
BigArray


Out[73]:
[[[0, 8, 20, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 8, 15, 17, 20, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.99607843,  0.87843138,  0.54509807,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 8, 15, 16, 20, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.99607843,  0.87843138,  0.54509807,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 8, 21, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 8, 20, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 9, 19, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]],
 [[0, 9, 19, 24],
  [array([ 0.61960787,  0.00392157,  0.25882354,  1.        ]),
   array([ 0.95686275,  0.42745098,  0.26274511,  1.        ]),
   array([ 0.61960787,  0.00392157,  0.25882354,  1.        ])]]]

In [85]:
'''
Make a colorbar as a separate figure.
'''

from matplotlib import pyplot
import matplotlib as mpl
daysDictionary = ['Monday','Tuesday','Wednesday','Thursday','Friday','Sunday','Saturday']

cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=5, vmax=10)
cmap = mpl.colors.ListedColormap(['r', 'g', 'b', 'c'])
cmap.set_over('0.25')
cmap.set_under('0.75')

# Make a figure and axes with dimensions as desired.
fig = pyplot.figure(figsize=(8,3))
for i in range(7):
    
    ax = fig.add_axes([0.05, 0.3*i, 0.9, 0.15])

      # Set the colormap and norm to correspond to the data for which
    # the colorbar will be used.
    cmap = mpl.colors.ListedColormap(BigArray[i][1])
 
    # If a ListedColormap is used, the length of the bounds array must be
    # one greater than the length of the color list.  The bounds must be
    # monotonically increasing.
    bounds = BigArray[i][0]
    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
    cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
                                       norm=norm,spacing='proportional',
                                       orientation='horizontal')
    cb1.set_label(daysDictionary[i])
    

print 'Aha!!!!'


Aha!!!!