notebook.community

Edit and run



In [1]:

    
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib
import datetime as dt

matplotlib.style.use('ggplot') # Look Pretty

def showandtell(title=None):
  if title != None: plt.savefig(title + ".png", bbox_inches='tight', dpi=300)
  plt.show()
  # exit()



In [2]:

    
#
# INFO: This dataset has call records for 10 users tracked over the course of 3 years.
# Your job is to find out where the users likely live and work at!



In [3]:

    
# TODO: Load up the dataset and take a peek at its head
# Convert the date using pd.to_datetime, and the time using pd.to_timedelta
df = pd.read_csv('Datasets/CDR.csv')
df.CallDate = pd.to_datetime(df.CallDate)
df.Duration = pd.to_timedelta(df.Duration)
df.head(5)









    Out[3]:






  
    
      
      In
      Out
      Direction
      CallDate
      CallTime
      DOW
      Duration
      TowerID
      TowerLat
      TowerLon
    
  
  
    
      0
      4638472273
      2666307251
      Incoming
      2010-12-25
      07:16:24.736813
      Sat
      00:02:41.741499
      0db53dd3-eb9c-4344-abc5-c2d74ebc3eec
      32.731611
      -96.709417
    
    
      1
      4638472273
      1755442610
      Incoming
      2010-12-25
      21:18:30.053710
      Sat
      00:02:47.108750
      aeaf8b43-8034-44fe-833d-31854a75acbf
      32.731722
      -96.709500
    
    
      2
      4638472273
      5481755331
      Incoming
      2010-12-25
      14:52:42.878016
      Sat
      00:04:35.356341
      fadaa83f-6001-45fd-aa4a-17d6c6b7ec00
      32.899944
      -96.910389
    
    
      3
      4638472273
      1755442610
      Incoming
      2010-12-25
      16:02:09.001913
      Sat
      00:02:23.498499
      fadaa83f-6001-45fd-aa4a-17d6c6b7ec00
      32.899944
      -96.910389
    
    
      4
      4638472273
      2145623768
      Incoming
      2010-12-25
      15:28:35.028554
      Sat
      00:03:54.692497
      95d7920d-c3cd-4d20-a568-9a55800dc807
      32.899944
      -96.910389



In [4]:

    
#
# TODO: Get a distinct list of "In" phone numbers (users) and store the values in a
# regular python list.
# Hint: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.tolist.html
#
phonenumber = list(df.In.unique())
phonenumber









    Out[4]:





[4638472273,
 1559410755,
 4931532174,
 2419930464,
 1884182865,
 3688089071,
 4555003213,
 2068627935,
 2894365987,
 8549533077]



In [5]:

    
# TODO: Create a slice called user1 that filters to only include dataset records where the
# "In" feature (user phone number) is equal to the first number on your unique list above;
# that is, the very first number in the dataset
user = 4638472273
user1 = df[df.In == user]



In [6]:

    
# INFO: Plot all the call locations
user1.plot.scatter(x='TowerLon', y='TowerLat', c='gray', alpha=0.1, title='Call Locations')
#showandtell()  # Comment this line out when you're ready to proceed









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x1112dc320>



In [7]:

    
# INFO: The locations map above should be too "busy" to really wrap your head around. This
# is where domain expertise comes into play. Your intuition tells you that people are likely
# to behave differently on weekends:
#
# On Weekends:
#   1. People probably don't go into work
#   2. They probably sleep in late on Saturday
#   3. They probably run a bunch of random errands, since they couldn't during the week
#   4. They should be home, at least during the very late hours, e.g. 1-4 AM
#
# On Weekdays:
#   1. People probably are at work during normal working hours
#   2. They probably are at home in the early morning and during the late night
#   3. They probably spend time commuting between work and home everyday



In [8]:

    
# TODO: Add more filters to the user1 slice you created. Add bitwise logic so that you're
# only examining records that came in on weekends (sat/sun).
#
user1 = user1[user1['DOW'].isin(['Sat', 'Sun'])]

# TODO: Further filter it down for calls that came in either before 6AM OR after 10pm (22:00:00).
# You can use < and > to compare the string times, just make sure you code them as military time
# strings, eg: "06:00:00", "22:00:00": https://en.wikipedia.org/wiki/24-hour_clock
#
# You might also want to review the Data Manipulation section for this. Once you have your filtered
# slice, print out its length:
#
user1 = user1[(user1.CallTime < "06:00:00")|(user1.CallTime >  "22:00:00")]
user1









    Out[8]:






  
    
      
      In
      Out
      Direction
      CallDate
      CallTime
      DOW
      Duration
      TowerID
      TowerLat
      TowerLon
    
  
  
    
      940
      4638472273
      6150928001
      Incoming
      2011-01-08
      22:22:33.693878
      Sat
      00:00:35.065912
      26f7441e-e64b-4b9d-8c5a-0eb34f7e919f
      32.731611
      -96.709417
    
    
      1875
      4638472273
      2145623768
      Incoming
      2011-01-22
      22:21:27.466908
      Sat
      00:03:40.018320
      2567c73b-3711-4a31-8683-9d12d56857d8
      32.731611
      -96.709417
    
    
      6949
      4638472273
      5621270003
      Missed
      2011-04-09
      22:59:23.206408
      Sat
      00:03:06.567364
      2567c73b-3711-4a31-8683-9d12d56857d8
      32.731611
      -96.709417
    
    
      7468
      4638472273
      7756914135
      Incoming
      2011-04-17
      05:15:40.205917
      Sun
      00:02:09.984993
      cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc
      32.731722
      -96.709500
    
    
      9753
      4638472273
      7922223441
      Incoming
      2011-05-21
      05:08:20.121145
      Sat
      00:01:30.059591
      0db53dd3-eb9c-4344-abc5-c2d74ebc3eec
      32.731611
      -96.709417
    
    
      13951
      4638472273
      7428841195
      Missed
      2011-07-23
      22:56:02.014834
      Sat
      00:04:48.497077
      26f7441e-e64b-4b9d-8c5a-0eb34f7e919f
      32.731611
      -96.709417
    
    
      17515
      4638472273
      2666307251
      Incoming
      2011-09-11
      05:44:15.432523
      Sun
      00:01:39.500414
      bcec0121-164d-406c-8c04-d12870893992
      32.731611
      -96.709417
    
    
      21638
      4638472273
      6150928001
      Incoming
      2011-11-13
      05:10:41.954161
      Sun
      00:00:44.369613
      f3f289f5-7d29-4d3f-9b9a-a0ceaca10a1d
      32.750556
      -96.694722
    
    
      22045
      4638472273
      6022767590
      Missed
      2011-11-19
      22:59:53.166951
      Sat
      00:00:05.795265
      8ba7e0d7-eb6e-4828-9c70-c469954a627d
      32.731611
      -96.709417
    
    
      22502
      4638472273
      5621270003
      Incoming
      2011-11-26
      22:10:48.879076
      Sat
      00:03:07.592270
      bcec0121-164d-406c-8c04-d12870893992
      32.731611
      -96.709417
    
    
      27866
      4638472273
      2946222380
      Missed
      2012-02-12
      22:48:34.319367
      Sun
      00:03:24.762196
      1bfc786a-d4c2-464c-bc45-b9aad0c81216
      32.731722
      -96.709500
    
    
      31975
      4638472273
      1755442610
      Incoming
      2012-04-14
      05:42:32.004437
      Sat
      00:07:02.657988
      d760c426-4d58-4f9c-9950-b68a3a54612c
      32.731611
      -96.709417
    
    
      32605
      4638472273
      7756914135
      Incoming
      2012-04-22
      22:44:21.992759
      Sun
      00:02:37.995933
      0db53dd3-eb9c-4344-abc5-c2d74ebc3eec
      32.731611
      -96.709417
    
    
      32606
      4638472273
      7922223441
      Incoming
      2012-04-22
      05:02:55.678110
      Sun
      00:03:50.596153
      8ba7e0d7-eb6e-4828-9c70-c469954a627d
      32.731611
      -96.709417
    
    
      34427
      4638472273
      5621270003
      Incoming
      2012-05-19
      05:42:45.405430
      Sat
      00:03:33.104834
      2567c73b-3711-4a31-8683-9d12d56857d8
      32.731611
      -96.709417
    
    
      38110
      4638472273
      2145623768
      Incoming
      2012-07-15
      05:59:38.556670
      Sun
      00:04:29.583137
      1bfc786a-d4c2-464c-bc45-b9aad0c81216
      32.731722
      -96.709500
    
    
      38564
      4638472273
      5481755331
      Incoming
      2012-07-22
      22:14:37.361635
      Sun
      00:01:44.568702
      aeaf8b43-8034-44fe-833d-31854a75acbf
      32.731722
      -96.709500
    
    
      39870
      4638472273
      8180005891
      Incoming
      2012-08-12
      22:31:22.519784
      Sun
      00:00:12.541785
      26f7441e-e64b-4b9d-8c5a-0eb34f7e919f
      32.731611
      -96.709417
    
    
      40275
      4638472273
      7756914135
      Missed
      2012-08-19
      05:33:32.103650
      Sun
      00:01:35.932559
      aeaf8b43-8034-44fe-833d-31854a75acbf
      32.731722
      -96.709500
    
    
      40789
      4638472273
      7428841195
      Incoming
      2012-08-26
      22:23:58.453302
      Sun
      00:06:08.978230
      bcec0121-164d-406c-8c04-d12870893992
      32.731611
      -96.709417
    
    
      41757
      4638472273
      7922223441
      Incoming
      2012-09-08
      22:45:53.008411
      Sat
      00:04:26.260140
      2567c73b-3711-4a31-8683-9d12d56857d8
      32.731611
      -96.709417
    
    
      41758
      4638472273
      7428841195
      Incoming
      2012-09-08
      22:19:08.522062
      Sat
      00:04:18.402811
      bcec0121-164d-406c-8c04-d12870893992
      32.731611
      -96.709417
    
    
      42224
      4638472273
      7922223441
      Incoming
      2012-09-15
      05:14:40.655349
      Sat
      00:04:07.561627
      cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc
      32.731722
      -96.709500
    
    
      44498
      4638472273
      7721472178
      Incoming
      2012-10-21
      22:00:09.753350
      Sun
      00:03:24.773041
      d760c426-4d58-4f9c-9950-b68a3a54612c
      32.731611
      -96.709417
    
    
      44927
      4638472273
      1755442610
      Incoming
      2012-10-27
      05:47:54.723850
      Sat
      00:04:28.876617
      1bfc786a-d4c2-464c-bc45-b9aad0c81216
      32.731722
      -96.709500
    
    
      44994
      4638472273
      7922223441
      Incoming
      2012-10-28
      05:24:17.234557
      Sun
      00:02:00.473836
      bde84490-896f-4060-8bd0-bd7a90cc20d9
      32.750556
      -96.694722
    
    
      49203
      4638472273
      8068555700
      Incoming
      2012-12-29
      05:08:40.433945
      Sat
      00:03:10.280023
      aeaf8b43-8034-44fe-833d-31854a75acbf
      32.731722
      -96.709500
    
    
      51586
      4638472273
      2946222380
      Incoming
      2013-02-03
      22:14:12.703233
      Sun
      00:04:05.550748
      cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc
      32.731722
      -96.709500



In [11]:

    
#
# INFO: Visualize the dataframe with a scatter plot as a sanity check. Since you're familiar with maps, you know well that your
# X-Coordinate should be Longitude, and your Y coordinate should be the tower Latitude. Check the dataset headers for proper column
# feature names.
# https://en.wikipedia.org/wiki/Geographic_coordinate_system#Geographic_latitude_and_longitude
#
# At this point, you don't yet know exactly where the user is located just based off the cell phone tower position data; but
# considering the below are for Calls that arrived in the twilight hours of weekends, it's likely that wherever they are bunched up
# is probably near the caller's residence:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(user1.TowerLon,user1.TowerLat, c='g', marker='o', alpha=0.2)
ax.set_title('Weekend Calls (<6am or >10p)')
showandtell()  # TODO: Comment this line out when you're ready to proceed



In [14]:

    
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters = 2)
user1 = pd.concat([user1.TowerLon, user1.TowerLat], axis = 1)
labels = kmeans.fit_predict(user1)

centroids = kmeans.cluster_centers_
ax.scatter(x = centroids[:, 0], y = centroids[:, 1], c = 'r', marker = 'x', s = 100)

plt.show()  # TODO: Comment this line out when you're ready to proceed



In [16]:

    
locations = []
for i in range(10):
	user = df[(df.In == phonenumber[i])]
	user.plot.scatter(x='TowerLon', y='TowerLat', c='purple', alpha=0.12, title='Call Locations', s = 30)
	user = user[(user.DOW == 'Sat') | (user.DOW == 'Sun')]
	user = user[(user.CallTime < "06:00:00") | (user.CallTime > "22:00:00")]
	fig = plt.figure()
	ax = fig.add_subplot(111)
	ax.scatter(user.TowerLon, user.TowerLat, c='g', marker='o', alpha=0.2)
	ax.set_title('Weekend Calls (<6am or >10p)')
	kmeans = KMeans(n_clusters = 2)
	user = pd.concat([user.TowerLon, user.TowerLat], axis = 1)
	labels = kmeans.fit_predict(user)

	centroids = kmeans.cluster_centers_
	ax.scatter(x = centroids[:, 0], y = centroids[:, 1], c = 'r', marker = 'x', s = 100)
	locations.append(centroids)
showandtell()



In [ ]:

	In	Out	Direction	CallDate	CallTime	DOW	Duration	TowerID	TowerLat	TowerLon
0	4638472273	2666307251	Incoming	2010-12-25	07:16:24.736813	Sat	00:02:41.741499	0db53dd3-eb9c-4344-abc5-c2d74ebc3eec	32.731611	-96.709417
1	4638472273	1755442610	Incoming	2010-12-25	21:18:30.053710	Sat	00:02:47.108750	aeaf8b43-8034-44fe-833d-31854a75acbf	32.731722	-96.709500
2	4638472273	5481755331	Incoming	2010-12-25	14:52:42.878016	Sat	00:04:35.356341	fadaa83f-6001-45fd-aa4a-17d6c6b7ec00	32.899944	-96.910389
3	4638472273	1755442610	Incoming	2010-12-25	16:02:09.001913	Sat	00:02:23.498499	fadaa83f-6001-45fd-aa4a-17d6c6b7ec00	32.899944	-96.910389
4	4638472273	2145623768	Incoming	2010-12-25	15:28:35.028554	Sat	00:03:54.692497	95d7920d-c3cd-4d20-a568-9a55800dc807	32.899944	-96.910389

	In	Out	Direction	CallDate	CallTime	DOW	Duration	TowerID	TowerLat	TowerLon
940	4638472273	6150928001	Incoming	2011-01-08	22:22:33.693878	Sat	00:00:35.065912	26f7441e-e64b-4b9d-8c5a-0eb34f7e919f	32.731611	-96.709417
1875	4638472273	2145623768	Incoming	2011-01-22	22:21:27.466908	Sat	00:03:40.018320	2567c73b-3711-4a31-8683-9d12d56857d8	32.731611	-96.709417
6949	4638472273	5621270003	Missed	2011-04-09	22:59:23.206408	Sat	00:03:06.567364	2567c73b-3711-4a31-8683-9d12d56857d8	32.731611	-96.709417
7468	4638472273	7756914135	Incoming	2011-04-17	05:15:40.205917	Sun	00:02:09.984993	cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc	32.731722	-96.709500
9753	4638472273	7922223441	Incoming	2011-05-21	05:08:20.121145	Sat	00:01:30.059591	0db53dd3-eb9c-4344-abc5-c2d74ebc3eec	32.731611	-96.709417
13951	4638472273	7428841195	Missed	2011-07-23	22:56:02.014834	Sat	00:04:48.497077	26f7441e-e64b-4b9d-8c5a-0eb34f7e919f	32.731611	-96.709417
17515	4638472273	2666307251	Incoming	2011-09-11	05:44:15.432523	Sun	00:01:39.500414	bcec0121-164d-406c-8c04-d12870893992	32.731611	-96.709417
21638	4638472273	6150928001	Incoming	2011-11-13	05:10:41.954161	Sun	00:00:44.369613	f3f289f5-7d29-4d3f-9b9a-a0ceaca10a1d	32.750556	-96.694722
22045	4638472273	6022767590	Missed	2011-11-19	22:59:53.166951	Sat	00:00:05.795265	8ba7e0d7-eb6e-4828-9c70-c469954a627d	32.731611	-96.709417
22502	4638472273	5621270003	Incoming	2011-11-26	22:10:48.879076	Sat	00:03:07.592270	bcec0121-164d-406c-8c04-d12870893992	32.731611	-96.709417
27866	4638472273	2946222380	Missed	2012-02-12	22:48:34.319367	Sun	00:03:24.762196	1bfc786a-d4c2-464c-bc45-b9aad0c81216	32.731722	-96.709500
31975	4638472273	1755442610	Incoming	2012-04-14	05:42:32.004437	Sat	00:07:02.657988	d760c426-4d58-4f9c-9950-b68a3a54612c	32.731611	-96.709417
32605	4638472273	7756914135	Incoming	2012-04-22	22:44:21.992759	Sun	00:02:37.995933	0db53dd3-eb9c-4344-abc5-c2d74ebc3eec	32.731611	-96.709417
32606	4638472273	7922223441	Incoming	2012-04-22	05:02:55.678110	Sun	00:03:50.596153	8ba7e0d7-eb6e-4828-9c70-c469954a627d	32.731611	-96.709417
34427	4638472273	5621270003	Incoming	2012-05-19	05:42:45.405430	Sat	00:03:33.104834	2567c73b-3711-4a31-8683-9d12d56857d8	32.731611	-96.709417
38110	4638472273	2145623768	Incoming	2012-07-15	05:59:38.556670	Sun	00:04:29.583137	1bfc786a-d4c2-464c-bc45-b9aad0c81216	32.731722	-96.709500
38564	4638472273	5481755331	Incoming	2012-07-22	22:14:37.361635	Sun	00:01:44.568702	aeaf8b43-8034-44fe-833d-31854a75acbf	32.731722	-96.709500
39870	4638472273	8180005891	Incoming	2012-08-12	22:31:22.519784	Sun	00:00:12.541785	26f7441e-e64b-4b9d-8c5a-0eb34f7e919f	32.731611	-96.709417
40275	4638472273	7756914135	Missed	2012-08-19	05:33:32.103650	Sun	00:01:35.932559	aeaf8b43-8034-44fe-833d-31854a75acbf	32.731722	-96.709500
40789	4638472273	7428841195	Incoming	2012-08-26	22:23:58.453302	Sun	00:06:08.978230	bcec0121-164d-406c-8c04-d12870893992	32.731611	-96.709417
41757	4638472273	7922223441	Incoming	2012-09-08	22:45:53.008411	Sat	00:04:26.260140	2567c73b-3711-4a31-8683-9d12d56857d8	32.731611	-96.709417
41758	4638472273	7428841195	Incoming	2012-09-08	22:19:08.522062	Sat	00:04:18.402811	bcec0121-164d-406c-8c04-d12870893992	32.731611	-96.709417
42224	4638472273	7922223441	Incoming	2012-09-15	05:14:40.655349	Sat	00:04:07.561627	cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc	32.731722	-96.709500
44498	4638472273	7721472178	Incoming	2012-10-21	22:00:09.753350	Sun	00:03:24.773041	d760c426-4d58-4f9c-9950-b68a3a54612c	32.731611	-96.709417
44927	4638472273	1755442610	Incoming	2012-10-27	05:47:54.723850	Sat	00:04:28.876617	1bfc786a-d4c2-464c-bc45-b9aad0c81216	32.731722	-96.709500
44994	4638472273	7922223441	Incoming	2012-10-28	05:24:17.234557	Sun	00:02:00.473836	bde84490-896f-4060-8bd0-bd7a90cc20d9	32.750556	-96.694722
49203	4638472273	8068555700	Incoming	2012-12-29	05:08:40.433945	Sat	00:03:10.280023	aeaf8b43-8034-44fe-833d-31854a75acbf	32.731722	-96.709500
51586	4638472273	2946222380	Incoming	2013-02-03	22:14:12.703233	Sun	00:04:05.550748	cd9f3b1a-2eb8-4cdb-86d1-5d4c2740b1dc	32.731722	-96.709500