notebook.community

Edit and run



In [1]:

    
# Wifi data amalgamation



In [2]:

    
import pandas as pd
import glob
import os



In [15]:

    
# Data flow consists of emails. Each email has 1 zipped folder, containing multiple csv files. Each csv files has one sheet.
# Filenames have same title beginning, with different numbers on end. 
data = pd.read_csv("Occupancy_Monitor_Client_Count_-_Most_Buildings_20171024_000515_566.csv", skiprows = 7, encoding = 'macroman', parse_dates = ['Event Time'])



In [16]:

    
data.head(5)









    Out[16]:







  
    
      
      Total Count
      Event Time
      Associated Client Count
      Authenticated Client Count
    
  
  
    
      0
      total
      Mon Oct 23 00:08:35 UTC 2017
      8603
      8487
    
    
      1
      total
      Mon Oct 23 00:13:55 UTC 2017
      8515
      8402
    
    
      2
      total
      Mon Oct 23 00:19:02 UTC 2017
      8475
      8360
    
    
      3
      total
      Mon Oct 23 00:24:10 UTC 2017
      8424
      8307
    
    
      4
      total
      Mon Oct 23 00:29:18 UTC 2017
      8352
      8239



In [29]:

    
#have to do it one-by-one if the column does not import correctly as datetime
datetimes=[]
for i in data.index:
    try:
        datetimes.append(pd.to_datetime(data.loc[i]['Event Time']))
    except:
        print(i)
#data['datetimes']=datetimes



In [30]:

    
#something fishy going on here
data.loc[275:282]









    Out[30]:







  
    
      
      Total Count
      Event Time
      Associated Client Count
      Authenticated Client Count
    
  
  
    
      275
      total
      Mon Oct 23 23:50:45 UTC 2017
      8717
      8590
    
    
      276
      total
      Mon Oct 23 23:55:56 UTC 2017
      8706
      8563
    
    
      277
      total
      Tue Oct 24 00:00:52 UTC 2017
      8637
      8500
    
    
      278
      Client Count
      nan
      NaN
      NaN
    
    
      279
      Key
      Event Time
      Associated Client Count
      Authenticated Client Count
    
    
      280
      Lancaster University > Pendle > A Floor
      Mon Oct 23 00:08:35 UTC 2017
      73
      71
    
    
      281
      Lancaster University > Pendle > A Floor
      Mon Oct 23 00:13:55 UTC 2017
      72
      72
    
    
      282
      Lancaster University > Pendle > A Floor
      Mon Oct 23 00:19:02 UTC 2017
      75
      72



In [34]:

    
#we just ignore, but need to maintain column length
#second, we also create a mask of good rows
datetimes=[]
datetimes_mask=[]
for i in data.index:
    try:
        datetimes.append(pd.to_datetime(data.loc[i]['Event Time']))
        datetimes_mask.append(i)
    except:
        datetimes.append(i)
data['datetimes']=datetimes
datam=data.loc[datetimes_mask]



In [36]:

    
#still inherist object type, so reassing
datam.dtypes









    Out[36]:





Total Count                   object
Event Time                    object
Associated Client Count       object
Authenticated Client Count    object
datetimes                     object
dtype: object



In [39]:

    
datam['datetimes']=pd.DatetimeIndex(datam['datetimes'])



In [40]:

    
#now AOK
datam.dtypes









    Out[40]:





Total Count                           object
Event Time                            object
Associated Client Count               object
Authenticated Client Count            object
datetimes                     datetime64[ns]
dtype: object



In [6]:

    
#no need for this now
#data['Event Time'] = data['Event Time'].str.replace(':',' ')
#data2 = pd.DataFrame([ x.split() for x in data['Event Time'].tolist()])
#data2.head(5)



In [41]:

    
datam.columns









    Out[41]:





Index(['Total Count', 'Event Time', 'Associated Client Count',
       'Authenticated Client Count', 'datetimes'],
      dtype='object')



In [47]:

    
# Preparation for multi hierarchy
data2=datam[['Total Count', 'Associated Client Count',
       'Authenticated Client Count', 'datetimes']]
dates=pd.DatetimeIndex(datam['datetimes'])
data2['Year']=dates.year
data2['Month']=dates.month
data2['Day']=dates.day
data2['Hour']=dates.hour
data2['Minute']=dates.minute
data2['Second']=dates.second



In [49]:

    
data2 = data2.set_index(['Year','Month','Day','Hour','Minute','Second'])



In [50]:

    
data2









    Out[50]:







  
    
      
      
      
      
      
      
      Total Count
      Associated Client Count
      Authenticated Client Count
      datetimes
    
    
      Year
      Month
      Day
      Hour
      Minute
      Second
      
      
      
      
    
  
  
    
      2017.0
      10.0
      23.0
      0.0
      8.0
      35.0
      total
      8603
      8487
      2017-10-23 00:08:35
    
    
      13.0
      55.0
      total
      8515
      8402
      2017-10-23 00:13:55
    
    
      19.0
      2.0
      total
      8475
      8360
      2017-10-23 00:19:02
    
    
      24.0
      10.0
      total
      8424
      8307
      2017-10-23 00:24:10
    
    
      29.0
      18.0
      total
      8352
      8239
      2017-10-23 00:29:18
    
    
      34.0
      37.0
      total
      8283
      8171
      2017-10-23 00:34:37
    
    
      39.0
      30.0
      total
      8171
      8071
      2017-10-23 00:39:30
    
    
      44.0
      42.0
      total
      8140
      8031
      2017-10-23 00:44:42
    
    
      49.0
      49.0
      total
      8073
      7959
      2017-10-23 00:49:49
    
    
      54.0
      53.0
      total
      8003
      7907
      2017-10-23 00:54:53
    
    
      1.0
      0.0
      12.0
      total
      7982
      7880
      2017-10-23 01:00:12
    
    
      5.0
      13.0
      total
      7979
      7864
      2017-10-23 01:05:13
    
    
      10.0
      22.0
      total
      7874
      7770
      2017-10-23 01:10:22
    
    
      15.0
      39.0
      total
      7868
      7751
      2017-10-23 01:15:39
    
    
      20.0
      37.0
      total
      7791
      7667
      2017-10-23 01:20:37
    
    
      25.0
      46.0
      total
      7738
      7622
      2017-10-23 01:25:46
    
    
      31.0
      7.0
      total
      7702
      7589
      2017-10-23 01:31:07
    
    
      35.0
      52.0
      total
      7672
      7559
      2017-10-23 01:35:52
    
    
      41.0
      1.0
      total
      7645
      7537
      2017-10-23 01:41:01
    
    
      46.0
      12.0
      total
      7590
      7482
      2017-10-23 01:46:12
    
    
      51.0
      6.0
      total
      7571
      7459
      2017-10-23 01:51:06
    
    
      56.0
      12.0
      total
      7543
      7423
      2017-10-23 01:56:12
    
    
      2.0
      1.0
      18.0
      total
      7501
      7403
      2017-10-23 02:01:18
    
    
      6.0
      16.0
      total
      7488
      7366
      2017-10-23 02:06:16
    
    
      11.0
      22.0
      total
      7455
      7341
      2017-10-23 02:11:22
    
    
      16.0
      23.0
      total
      7398
      7295
      2017-10-23 02:16:23
    
    
      21.0
      28.0
      total
      7395
      7280
      2017-10-23 02:21:28
    
    
      26.0
      25.0
      total
      7341
      7219
      2017-10-23 02:26:25
    
    
      31.0
      30.0
      total
      7259
      7149
      2017-10-23 02:31:30
    
    
      36.0
      31.0
      total
      7255
      7113
      2017-10-23 02:36:31
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      20.0
      29.0
      8.0
      Lancaster University > SW hse 24-26 > A
      38
      37
      2017-10-23 20:29:08
    
    
      34.0
      1.0
      Lancaster University > SW hse 24-26 > A
      42
      39
      2017-10-23 20:34:01
    
    
      39.0
      50.0
      Lancaster University > SW hse 24-26 > A
      40
      37
      2017-10-23 20:39:50
    
    
      44.0
      41.0
      Lancaster University > SW hse 24-26 > A
      40
      40
      2017-10-23 20:44:41
    
    
      49.0
      53.0
      Lancaster University > SW hse 24-26 > A
      37
      37
      2017-10-23 20:49:53
    
    
      55.0
      3.0
      Lancaster University > SW hse 24-26 > A
      39
      39
      2017-10-23 20:55:03
    
    
      21.0
      0.0
      12.0
      Lancaster University > SW hse 24-26 > A
      37
      36
      2017-10-23 21:00:12
    
    
      5.0
      11.0
      Lancaster University > SW hse 24-26 > A
      36
      35
      2017-10-23 21:05:11
    
    
      10.0
      50.0
      Lancaster University > SW hse 24-26 > A
      35
      33
      2017-10-23 21:10:50
    
    
      15.0
      46.0
      Lancaster University > SW hse 24-26 > A
      37
      34
      2017-10-23 21:15:46
    
    
      21.0
      4.0
      Lancaster University > SW hse 24-26 > A
      33
      32
      2017-10-23 21:21:04
    
    
      26.0
      14.0
      Lancaster University > SW hse 24-26 > A
      34
      33
      2017-10-23 21:26:14
    
    
      31.0
      28.0
      Lancaster University > SW hse 24-26 > A
      38
      37
      2017-10-23 21:31:28
    
    
      36.0
      40.0
      Lancaster University > SW hse 24-26 > A
      39
      39
      2017-10-23 21:36:40
    
    
      41.0
      39.0
      Lancaster University > SW hse 24-26 > A
      40
      38
      2017-10-23 21:41:39
    
    
      46.0
      42.0
      Lancaster University > SW hse 24-26 > A
      38
      38
      2017-10-23 21:46:42
    
    
      51.0
      59.0
      Lancaster University > SW hse 24-26 > A
      42
      39
      2017-10-23 21:51:59
    
    
      57.0
      16.0
      Lancaster University > SW hse 24-26 > A
      39
      39
      2017-10-23 21:57:16
    
    
      22.0
      2.0
      44.0
      Lancaster University > SW hse 24-26 > A
      38
      37
      2017-10-23 22:02:44
    
    
      7.0
      31.0
      Lancaster University > SW hse 24-26 > A
      42
      41
      2017-10-23 22:07:31
    
    
      13.0
      15.0
      Lancaster University > SW hse 24-26 > A
      43
      43
      2017-10-23 22:13:15
    
    
      18.0
      31.0
      Lancaster University > SW hse 24-26 > A
      44
      42
      2017-10-23 22:18:31
    
    
      23.0
      9.0
      Lancaster University > SW hse 24-26 > A
      45
      45
      2017-10-23 22:23:09
    
    
      28.0
      48.0
      Lancaster University > SW hse 24-26 > A
      44
      43
      2017-10-23 22:28:48
    
    
      33.0
      31.0
      Lancaster University > SW hse 24-26 > A
      44
      43
      2017-10-23 22:33:31
    
    
      38.0
      39.0
      Lancaster University > SW hse 24-26 > A
      43
      42
      2017-10-23 22:38:39
    
    
      43.0
      55.0
      Lancaster University > SW hse 24-26 > A
      44
      44
      2017-10-23 22:43:55
    
    
      48.0
      48.0
      Lancaster University > SW hse 24-26 > A
      48
      48
      2017-10-23 22:48:48
    
    
      54.0
      2.0
      Lancaster University > SW hse 24-26 > A
      42
      42
      2017-10-23 22:54:02
    
    
      59.0
      35.0
      Lancaster University > SW hse 24-26 > A
      43
      42
      2017-10-23 22:59:35
    
  

15001 rows × 4 columns

	Total Count	Event Time	Associated Client Count	Authenticated Client Count
0	total	Mon Oct 23 00:08:35 UTC 2017	8603	8487
1	total	Mon Oct 23 00:13:55 UTC 2017	8515	8402
2	total	Mon Oct 23 00:19:02 UTC 2017	8475	8360
3	total	Mon Oct 23 00:24:10 UTC 2017	8424	8307
4	total	Mon Oct 23 00:29:18 UTC 2017	8352	8239

	Total Count	Event Time	Associated Client Count	Authenticated Client Count
275	total	Mon Oct 23 23:50:45 UTC 2017	8717	8590
276	total	Mon Oct 23 23:55:56 UTC 2017	8706	8563
277	total	Tue Oct 24 00:00:52 UTC 2017	8637	8500
278	Client Count	nan	NaN	NaN
279	Key	Event Time	Associated Client Count	Authenticated Client Count
280	Lancaster University > Pendle > A Floor	Mon Oct 23 00:08:35 UTC 2017	73	71
281	Lancaster University > Pendle > A Floor	Mon Oct 23 00:13:55 UTC 2017	72	72
282	Lancaster University > Pendle > A Floor	Mon Oct 23 00:19:02 UTC 2017	75	72

	0	1	2	4	5	6	7
0	Mon	Oct	23	08	35	UTC	2017
1	Mon	Oct	23	13	55	UTC	2017
2	Mon	Oct	23	19	02	UTC	2017
3	Mon	Oct	23	24	10	UTC	2017
4	Mon	Oct	23	29	18	UTC	2017

						Total Count	Associated Client Count	Authenticated Client Count	datetimes
Year	Month	Day	Hour	Minute	Second
2017.0	10.0	23.0	0.0	8.0	35.0	total	8603	8487	2017-10-23 00:08:35
				13.0	55.0	total	8515	8402	2017-10-23 00:13:55
				19.0	2.0	total	8475	8360	2017-10-23 00:19:02
				24.0	10.0	total	8424	8307	2017-10-23 00:24:10
				29.0	18.0	total	8352	8239	2017-10-23 00:29:18
				34.0	37.0	total	8283	8171	2017-10-23 00:34:37
				39.0	30.0	total	8171	8071	2017-10-23 00:39:30
				44.0	42.0	total	8140	8031	2017-10-23 00:44:42
				49.0	49.0	total	8073	7959	2017-10-23 00:49:49
				54.0	53.0	total	8003	7907	2017-10-23 00:54:53
			1.0	0.0	12.0	total	7982	7880	2017-10-23 01:00:12
				5.0	13.0	total	7979	7864	2017-10-23 01:05:13
				10.0	22.0	total	7874	7770	2017-10-23 01:10:22
				15.0	39.0	total	7868	7751	2017-10-23 01:15:39
				20.0	37.0	total	7791	7667	2017-10-23 01:20:37
				25.0	46.0	total	7738	7622	2017-10-23 01:25:46
				31.0	7.0	total	7702	7589	2017-10-23 01:31:07
				35.0	52.0	total	7672	7559	2017-10-23 01:35:52
				41.0	1.0	total	7645	7537	2017-10-23 01:41:01
				46.0	12.0	total	7590	7482	2017-10-23 01:46:12
				51.0	6.0	total	7571	7459	2017-10-23 01:51:06
				56.0	12.0	total	7543	7423	2017-10-23 01:56:12
			2.0	1.0	18.0	total	7501	7403	2017-10-23 02:01:18
				6.0	16.0	total	7488	7366	2017-10-23 02:06:16
				11.0	22.0	total	7455	7341	2017-10-23 02:11:22
				16.0	23.0	total	7398	7295	2017-10-23 02:16:23
				21.0	28.0	total	7395	7280	2017-10-23 02:21:28
				26.0	25.0	total	7341	7219	2017-10-23 02:26:25
				31.0	30.0	total	7259	7149	2017-10-23 02:31:30
				36.0	31.0	total	7255	7113	2017-10-23 02:36:31
			...	...	...	...	...	...	...
			20.0	29.0	8.0	Lancaster University > SW hse 24-26 > A	38	37	2017-10-23 20:29:08
				34.0	1.0	Lancaster University > SW hse 24-26 > A	42	39	2017-10-23 20:34:01
				39.0	50.0	Lancaster University > SW hse 24-26 > A	40	37	2017-10-23 20:39:50
				44.0	41.0	Lancaster University > SW hse 24-26 > A	40	40	2017-10-23 20:44:41
				49.0	53.0	Lancaster University > SW hse 24-26 > A	37	37	2017-10-23 20:49:53
				55.0	3.0	Lancaster University > SW hse 24-26 > A	39	39	2017-10-23 20:55:03
			21.0	0.0	12.0	Lancaster University > SW hse 24-26 > A	37	36	2017-10-23 21:00:12
				5.0	11.0	Lancaster University > SW hse 24-26 > A	36	35	2017-10-23 21:05:11
				10.0	50.0	Lancaster University > SW hse 24-26 > A	35	33	2017-10-23 21:10:50
				15.0	46.0	Lancaster University > SW hse 24-26 > A	37	34	2017-10-23 21:15:46
				21.0	4.0	Lancaster University > SW hse 24-26 > A	33	32	2017-10-23 21:21:04
				26.0	14.0	Lancaster University > SW hse 24-26 > A	34	33	2017-10-23 21:26:14
				31.0	28.0	Lancaster University > SW hse 24-26 > A	38	37	2017-10-23 21:31:28
				36.0	40.0	Lancaster University > SW hse 24-26 > A	39	39	2017-10-23 21:36:40
				41.0	39.0	Lancaster University > SW hse 24-26 > A	40	38	2017-10-23 21:41:39
				46.0	42.0	Lancaster University > SW hse 24-26 > A	38	38	2017-10-23 21:46:42
				51.0	59.0	Lancaster University > SW hse 24-26 > A	42	39	2017-10-23 21:51:59
				57.0	16.0	Lancaster University > SW hse 24-26 > A	39	39	2017-10-23 21:57:16
			22.0	2.0	44.0	Lancaster University > SW hse 24-26 > A	38	37	2017-10-23 22:02:44
				7.0	31.0	Lancaster University > SW hse 24-26 > A	42	41	2017-10-23 22:07:31
				13.0	15.0	Lancaster University > SW hse 24-26 > A	43	43	2017-10-23 22:13:15
				18.0	31.0	Lancaster University > SW hse 24-26 > A	44	42	2017-10-23 22:18:31
				23.0	9.0	Lancaster University > SW hse 24-26 > A	45	45	2017-10-23 22:23:09
				28.0	48.0	Lancaster University > SW hse 24-26 > A	44	43	2017-10-23 22:28:48
				33.0	31.0	Lancaster University > SW hse 24-26 > A	44	43	2017-10-23 22:33:31
				38.0	39.0	Lancaster University > SW hse 24-26 > A	43	42	2017-10-23 22:38:39
				43.0	55.0	Lancaster University > SW hse 24-26 > A	44	44	2017-10-23 22:43:55
				48.0	48.0	Lancaster University > SW hse 24-26 > A	48	48	2017-10-23 22:48:48
				54.0	2.0	Lancaster University > SW hse 24-26 > A	42	42	2017-10-23 22:54:02
				59.0	35.0	Lancaster University > SW hse 24-26 > A	43	42	2017-10-23 22:59:35