In [5]:
import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot') # Look Pretty
#
# INFO: This dataset has call records for 10 users tracked over the course of 3 years.
# Your job is to find out where the users likely live at!
def showandtell(title=None):
if title != None: plt.savefig(title + ".png", bbox_inches='tight', dpi=300)
plt.show()
# exit()
def clusterInfo(model):
print ("Cluster Analysis Inertia: ", model.inertia_)
print ('------------------------------------------')
for i in range(len(model.cluster_centers_)):
print ("\n Cluster ", i)
print (" Centroid ", model.cluster_centers_[i])
print (" #Samples ", (model.labels_==i).sum()) # NumPy Power
# Find the cluster with the least # attached nodes
def clusterWithFewestSamples(model):
# Ensure there's at least on cluster...
minSamples = len(model.labels_)
minCluster = 0
for i in range(len(model.cluster_centers_)):
if minSamples > (model.labels_==i).sum():
minCluster = i
minSamples = (model.labels_==i).sum()
print("\n Cluster With Fewest Samples: ", minCluster)
return (model.labels_==minCluster)
In [ ]:
# TODO: Load up the dataset and take a peek at its head
# Convert the date using pd.to_datetime, and the time using pd.to_timedelta
df = pd.read_csv('Datasets/CDR.csv')
df.CallDate = pd.to_datetime(df.CallDate)
df.CallTime = pd.to_datetime(df.CallTime).dt.time[8]
df.head(5)