In [1]:
# Wifi data amalgamation

In [2]:
import pandas as pd
import glob
import os

In [15]:
# Data flow consists of emails. Each email has 1 zipped folder, containing multiple csv files. Each csv files has one sheet.
# Filenames have same title beginning, with different numbers on end. 
data = pd.read_csv("Occupancy_Monitor_Client_Count_-_Most_Buildings_20171024_000515_566.csv", skiprows = 7, encoding = 'macroman', parse_dates = ['Event Time'])

In [16]:
data.head(5)


Out[16]:
Total Count Event Time Associated Client Count Authenticated Client Count
0 total Mon Oct 23 00:08:35 UTC 2017 8603 8487
1 total Mon Oct 23 00:13:55 UTC 2017 8515 8402
2 total Mon Oct 23 00:19:02 UTC 2017 8475 8360
3 total Mon Oct 23 00:24:10 UTC 2017 8424 8307
4 total Mon Oct 23 00:29:18 UTC 2017 8352 8239

In [29]:
#have to do it one-by-one if the column does not import correctly as datetime
datetimes=[]
for i in data.index:
    try:
        datetimes.append(pd.to_datetime(data.loc[i]['Event Time']))
    except:
        print(i)
#data['datetimes']=datetimes


279

In [30]:
#something fishy going on here
data.loc[275:282]


Out[30]:
Total Count Event Time Associated Client Count Authenticated Client Count
275 total Mon Oct 23 23:50:45 UTC 2017 8717 8590
276 total Mon Oct 23 23:55:56 UTC 2017 8706 8563
277 total Tue Oct 24 00:00:52 UTC 2017 8637 8500
278 Client Count nan NaN NaN
279 Key Event Time Associated Client Count Authenticated Client Count
280 Lancaster University > Pendle > A Floor Mon Oct 23 00:08:35 UTC 2017 73 71
281 Lancaster University > Pendle > A Floor Mon Oct 23 00:13:55 UTC 2017 72 72
282 Lancaster University > Pendle > A Floor Mon Oct 23 00:19:02 UTC 2017 75 72

In [34]:
#we just ignore, but need to maintain column length
#second, we also create a mask of good rows
datetimes=[]
datetimes_mask=[]
for i in data.index:
    try:
        datetimes.append(pd.to_datetime(data.loc[i]['Event Time']))
        datetimes_mask.append(i)
    except:
        datetimes.append(i)
data['datetimes']=datetimes
datam=data.loc[datetimes_mask]

In [36]:
#still inherist object type, so reassing
datam.dtypes


Out[36]:
Total Count                   object
Event Time                    object
Associated Client Count       object
Authenticated Client Count    object
datetimes                     object
dtype: object

In [39]:
datam['datetimes']=pd.DatetimeIndex(datam['datetimes'])

In [40]:
#now AOK
datam.dtypes


Out[40]:
Total Count                           object
Event Time                            object
Associated Client Count               object
Authenticated Client Count            object
datetimes                     datetime64[ns]
dtype: object

In [6]:
#no need for this now
#data['Event Time'] = data['Event Time'].str.replace(':',' ')
#data2 = pd.DataFrame([ x.split() for x in data['Event Time'].tolist()])
#data2.head(5)


Out[6]:
0 1 2 3 4 5 6 7
0 Mon Oct 23 00 08 35 UTC 2017
1 Mon Oct 23 00 13 55 UTC 2017
2 Mon Oct 23 00 19 02 UTC 2017
3 Mon Oct 23 00 24 10 UTC 2017
4 Mon Oct 23 00 29 18 UTC 2017

In [41]:
datam.columns


Out[41]:
Index(['Total Count', 'Event Time', 'Associated Client Count',
       'Authenticated Client Count', 'datetimes'],
      dtype='object')

In [47]:
# Preparation for multi hierarchy
data2=datam[['Total Count', 'Associated Client Count',
       'Authenticated Client Count', 'datetimes']]
dates=pd.DatetimeIndex(datam['datetimes'])
data2['Year']=dates.year
data2['Month']=dates.month
data2['Day']=dates.day
data2['Hour']=dates.hour
data2['Minute']=dates.minute
data2['Second']=dates.second

In [49]:
data2 = data2.set_index(['Year','Month','Day','Hour','Minute','Second'])

In [50]:
data2


Out[50]:
Total Count Associated Client Count Authenticated Client Count datetimes
Year Month Day Hour Minute Second
2017.0 10.0 23.0 0.0 8.0 35.0 total 8603 8487 2017-10-23 00:08:35
13.0 55.0 total 8515 8402 2017-10-23 00:13:55
19.0 2.0 total 8475 8360 2017-10-23 00:19:02
24.0 10.0 total 8424 8307 2017-10-23 00:24:10
29.0 18.0 total 8352 8239 2017-10-23 00:29:18
34.0 37.0 total 8283 8171 2017-10-23 00:34:37
39.0 30.0 total 8171 8071 2017-10-23 00:39:30
44.0 42.0 total 8140 8031 2017-10-23 00:44:42
49.0 49.0 total 8073 7959 2017-10-23 00:49:49
54.0 53.0 total 8003 7907 2017-10-23 00:54:53
1.0 0.0 12.0 total 7982 7880 2017-10-23 01:00:12
5.0 13.0 total 7979 7864 2017-10-23 01:05:13
10.0 22.0 total 7874 7770 2017-10-23 01:10:22
15.0 39.0 total 7868 7751 2017-10-23 01:15:39
20.0 37.0 total 7791 7667 2017-10-23 01:20:37
25.0 46.0 total 7738 7622 2017-10-23 01:25:46
31.0 7.0 total 7702 7589 2017-10-23 01:31:07
35.0 52.0 total 7672 7559 2017-10-23 01:35:52
41.0 1.0 total 7645 7537 2017-10-23 01:41:01
46.0 12.0 total 7590 7482 2017-10-23 01:46:12
51.0 6.0 total 7571 7459 2017-10-23 01:51:06
56.0 12.0 total 7543 7423 2017-10-23 01:56:12
2.0 1.0 18.0 total 7501 7403 2017-10-23 02:01:18
6.0 16.0 total 7488 7366 2017-10-23 02:06:16
11.0 22.0 total 7455 7341 2017-10-23 02:11:22
16.0 23.0 total 7398 7295 2017-10-23 02:16:23
21.0 28.0 total 7395 7280 2017-10-23 02:21:28
26.0 25.0 total 7341 7219 2017-10-23 02:26:25
31.0 30.0 total 7259 7149 2017-10-23 02:31:30
36.0 31.0 total 7255 7113 2017-10-23 02:36:31
... ... ... ... ... ... ...
20.0 29.0 8.0 Lancaster University > SW hse 24-26 > A 38 37 2017-10-23 20:29:08
34.0 1.0 Lancaster University > SW hse 24-26 > A 42 39 2017-10-23 20:34:01
39.0 50.0 Lancaster University > SW hse 24-26 > A 40 37 2017-10-23 20:39:50
44.0 41.0 Lancaster University > SW hse 24-26 > A 40 40 2017-10-23 20:44:41
49.0 53.0 Lancaster University > SW hse 24-26 > A 37 37 2017-10-23 20:49:53
55.0 3.0 Lancaster University > SW hse 24-26 > A 39 39 2017-10-23 20:55:03
21.0 0.0 12.0 Lancaster University > SW hse 24-26 > A 37 36 2017-10-23 21:00:12
5.0 11.0 Lancaster University > SW hse 24-26 > A 36 35 2017-10-23 21:05:11
10.0 50.0 Lancaster University > SW hse 24-26 > A 35 33 2017-10-23 21:10:50
15.0 46.0 Lancaster University > SW hse 24-26 > A 37 34 2017-10-23 21:15:46
21.0 4.0 Lancaster University > SW hse 24-26 > A 33 32 2017-10-23 21:21:04
26.0 14.0 Lancaster University > SW hse 24-26 > A 34 33 2017-10-23 21:26:14
31.0 28.0 Lancaster University > SW hse 24-26 > A 38 37 2017-10-23 21:31:28
36.0 40.0 Lancaster University > SW hse 24-26 > A 39 39 2017-10-23 21:36:40
41.0 39.0 Lancaster University > SW hse 24-26 > A 40 38 2017-10-23 21:41:39
46.0 42.0 Lancaster University > SW hse 24-26 > A 38 38 2017-10-23 21:46:42
51.0 59.0 Lancaster University > SW hse 24-26 > A 42 39 2017-10-23 21:51:59
57.0 16.0 Lancaster University > SW hse 24-26 > A 39 39 2017-10-23 21:57:16
22.0 2.0 44.0 Lancaster University > SW hse 24-26 > A 38 37 2017-10-23 22:02:44
7.0 31.0 Lancaster University > SW hse 24-26 > A 42 41 2017-10-23 22:07:31
13.0 15.0 Lancaster University > SW hse 24-26 > A 43 43 2017-10-23 22:13:15
18.0 31.0 Lancaster University > SW hse 24-26 > A 44 42 2017-10-23 22:18:31
23.0 9.0 Lancaster University > SW hse 24-26 > A 45 45 2017-10-23 22:23:09
28.0 48.0 Lancaster University > SW hse 24-26 > A 44 43 2017-10-23 22:28:48
33.0 31.0 Lancaster University > SW hse 24-26 > A 44 43 2017-10-23 22:33:31
38.0 39.0 Lancaster University > SW hse 24-26 > A 43 42 2017-10-23 22:38:39
43.0 55.0 Lancaster University > SW hse 24-26 > A 44 44 2017-10-23 22:43:55
48.0 48.0 Lancaster University > SW hse 24-26 > A 48 48 2017-10-23 22:48:48
54.0 2.0 Lancaster University > SW hse 24-26 > A 42 42 2017-10-23 22:54:02
59.0 35.0 Lancaster University > SW hse 24-26 > A 43 42 2017-10-23 22:59:35

15001 rows × 4 columns