In [20]:
import pandas as pd
cdr = pd.read_csv("./data/cdr_data.csv", dtype={0:str})
cdr.head()
Out[20]:
In [21]:
cdr_name = pd.DataFrame(cdr.In.unique(), columns=['number'])
cdr_name
Out[21]:
In [32]:
from faker import Faker
fake = Faker()
data = cdr_name
data['name'] = data.index.map(lambda x : fake.name())
#data['Number'] = data.index.map(lambda x : "0" + str(fake.zipcode() + fake.zipcode()))
data.loc[int(len(data)/2), ['name']] = "John Doe"
data.loc[int(len(data)/2), ['number']] = "4638472273"
#data['number'] = data.number.apply(lambda x : "0" + str(x))
assert 10 == len(data.name.value_counts()), "duplicate name"
#data = data.set_index('number')
data.to_excel("data/phoneowners.xlsx")
In [26]:
data[data['name'] == "John Doe"]
Out[26]:
In [69]:
data.to_excel("data/phoneowners.xlsx", index=None)
In [86]:
cdr = pd.read_csv("data/CDR_original.csv", dtype={0:str,1:str})
cdr.head()
Out[86]:
In [87]:
towers = pd.read_csv("data/CDR_original.csv")[['TowerID', 'TowerLat', 'TowerLon']].drop_duplicates()
towers.to_csv("data/towers.csv", index=None, float_format="%6.6f")
towers.head()
Out[87]:
In [88]:
towers = pd.read_csv("data/CDR_original.csv")[['TowerID', 'TowerLat', 'TowerLon']].drop_duplicates
In [89]:
cdr_data = cdr[['In', 'Out', 'Direction', 'CallDate', 'CallTime', 'Duration', 'TowerID']]
cdr_data['CallTimestamp'] = cdr.CallDate + " " + cdr.CallTime
del(cdr_data['CallDate'])
del(cdr_data['CallTime'])
cdr_data.head()
Out[89]:
In [95]:
cdr_data.to_csv('data/cdr_data.csv', quoting=1, index=None, columns=['In', 'Out', 'Direction', 'CallTimestamp', 'Duration', 'TowerID'])
cdr_data.head()
Out[95]:
In [92]:
cdr_data.info()
In [ ]: