In [1]:
import pandas as pd
cdr = pd.read_excel("cdr_data.xlsx")
cdr.head()
Out[1]:
Let's look into the dataset
In [2]:
cdr.info()
In [3]:
cdr['Start'] = pd.to_datetime(cdr['Start'])
cdr.head()
Out[3]:
In [4]:
suspect = cdr[cdr['Caller'] == 4638472273]
suspect = suspect[suspect['Event'] == 'Incoming']
suspect.head()
Out[4]:
In [5]:
towers = pd.read_csv("darknet.io/hacks/infrastructure/mobile_net/texas_towers.csv", index_col=0)
towers.head()
Out[5]:
In [6]:
suspect_loc = suspect.join(towers, on='TowerID')
suspect_loc.head()
Out[6]:
In [7]:
suspect_loc.plot.scatter('TowerLon', "TowerLat");
In [8]:
suspect_loc['DoW'] = suspect_loc['Start'].dt.weekday_name
suspect_loc.head()
Out[8]:
In [9]:
suspect_on_weekend = suspect_loc[suspect_loc['DoW'].isin(['Saturday', 'Sunday'])].copy()
suspect_on_weekend.head()
Out[9]:
Let's take a look at the weekend's locations
In [10]:
suspect_on_weekend.plot.scatter('TowerLon', "TowerLat");
In [11]:
suspect_on_weekend['hour'] = suspect_on_weekend['Start'].dt.hour
suspect_on_weekend.head()
Out[11]:
Keep only the sleeping hours
In [12]:
suspect_on_weekend_night = suspect_on_weekend[
(suspect_on_weekend['hour'] < 6) | (suspect_on_weekend['hour'] > 22)]
suspect_on_weekend_night.head()
Out[12]:
Let's see where the suspect sleeps on weekends
In [13]:
ax = suspect_on_weekend_night.plot.scatter('TowerLat', 'TowerLon')
In [14]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 1)
data = suspect_on_weekend_night[['TowerLat', 'TowerLon']]
kmeans.fit_predict(data)
centroids = kmeans.cluster_centers_
centroids
Out[14]:
Let's plot the main center of sleeping activity.
In [15]:
ax.scatter(x = centroids[:, 0], y = centroids[:, 1], c = 'r', marker = 'x')
ax.figure
Out[15]:
In [17]:
print("https://www.google.com/maps/search/?api=1&query={},{}".format(centroids[0][0], centroids[0][1]))
Any questions?
Contact
Markus Harrer
markus.harrer@innoq.com
@feststelltaste
https://feststelltaste.de
This presentation is based on data and ideas from the online course "Programming with Python for Data Science": https://www.edx.org/course/programming-with-python-for-data-science