In [13]:
import os

os.getcwd()
os.chdir("C:\Vindico\Projects\Data\Course\Python\Udacity\Introduction to Data Science\Lesson 2\Exercise")
os.getcwd()


Out[13]:
'C:\\Vindico\\Projects\\Data\\Course\\Python\\Udacity\\Introduction to Data Science\\Lesson 2\\Exercise'

In [14]:
import pandas

def get_hourly_entries(filename):
    '''
    The data in the MTA Subway Turnstile data reports on the cumulative
    number of entries and exits per row.  Assume that you have a dataframe
    called df that contains only the rows for a particular turnstile machine
    (i.e., unique SCP, C/A, and UNIT).  This function should change
    these cumulative entry numbers to a count of entries since the last reading
    (i.e., entries since the last row in the dataframe).
    
    More specifically, you want to do two things:
       1) Create a new column called ENTRIESn_hourly
       2) Assign to the column the difference between ENTRIESn of the current row 
          and the previous row. If there is any NaN, fill/replace it with 1.
    
    You may find the pandas functions shift() and fillna() to be helpful in this exercise.
    
    Examples of what your dataframe should look like at the end of this exercise:
    
           C/A  UNIT       SCP     DATEn     TIMEn    DESCn  ENTRIESn    EXITSn  ENTRIESn_hourly
    0     A002  R051  02-00-00  05-01-11  00:00:00  REGULAR   3144312   1088151                1
    1     A002  R051  02-00-00  05-01-11  04:00:00  REGULAR   3144335   1088159               23
    2     A002  R051  02-00-00  05-01-11  08:00:00  REGULAR   3144353   1088177               18
    3     A002  R051  02-00-00  05-01-11  12:00:00  REGULAR   3144424   1088231               71
    4     A002  R051  02-00-00  05-01-11  16:00:00  REGULAR   3144594   1088275              170
    5     A002  R051  02-00-00  05-01-11  20:00:00  REGULAR   3144808   1088317              214
    6     A002  R051  02-00-00  05-02-11  00:00:00  REGULAR   3144895   1088328               87
    7     A002  R051  02-00-00  05-02-11  04:00:00  REGULAR   3144905   1088331               10
    8     A002  R051  02-00-00  05-02-11  08:00:00  REGULAR   3144941   1088420               36
    9     A002  R051  02-00-00  05-02-11  12:00:00  REGULAR   3145094   1088753              153
    10    A002  R051  02-00-00  05-02-11  16:00:00  REGULAR   3145337   1088823              243
    ...
    ...

    '''
    #your code here
    df = pandas.read_csv(filename)
    df = df[df['DESCn']=='REGULAR']
    df['ENTRIESn_hourly'] = df['ENTRIESn'] - df['ENTRIESn'].shift(1)
    df['ENTRIESn_hourly'] = df['ENTRIESn_hourly'].fillna(1)
    return df

In [15]:
get_hourly_entries("output.csv")


Out[15]:
C/A UNIT SCP DATEn TIMEn DESCn ENTRIESn EXITSn ENTRIESn_hourly
0 A002 R051 02-00-00 05-21-11 04:00:00 REGULAR 3169415 1097588 1
1 A002 R051 02-00-00 05-21-11 08:00:00 REGULAR 3169431 1097607 16
2 A002 R051 02-00-00 05-21-11 12:00:00 REGULAR 3169506 1097686 75
3 A002 R051 02-00-00 05-21-11 16:00:00 REGULAR 3169693 1097734 187
4 A002 R051 02-00-00 05-21-11 20:00:00 REGULAR 3169998 1097769 305
5 A002 R051 02-00-00 05-22-11 00:00:00 REGULAR 3170119 1097792 121
6 A002 R051 02-00-00 05-22-11 04:00:00 REGULAR 3170146 1097801 27
7 A002 R051 02-00-00 05-22-11 08:00:00 REGULAR 3170164 1097820 18
8 A002 R051 02-00-00 05-22-11 12:00:00 REGULAR 3170240 1097867 76
9 A002 R051 02-00-00 05-22-11 16:00:00 REGULAR 3170388 1097912 148
10 A002 R051 02-00-00 05-22-11 20:00:00 REGULAR 3170611 1097941 223
11 A002 R051 02-00-00 05-23-11 00:00:00 REGULAR 3170695 1097964 84
12 A002 R051 02-00-00 05-23-11 04:00:00 REGULAR 3170701 1097964 6
13 A002 R051 02-00-00 05-23-11 08:00:00 REGULAR 3170746 1098069 45
14 A002 R051 02-00-00 05-23-11 12:00:00 REGULAR 3170897 1098378 151
15 A002 R051 02-00-00 05-23-11 16:00:00 REGULAR 3171194 1098447 297
16 A002 R051 02-00-00 05-23-11 20:00:00 REGULAR 3172059 1098511 865
17 A002 R051 02-00-00 05-24-11 00:00:00 REGULAR 3172200 1098528 141
18 A002 R051 02-00-00 05-24-11 04:00:00 REGULAR 3172214 1098529 14
19 A002 R051 02-00-00 05-24-11 08:00:00 REGULAR 3172266 1098628 52
20 A002 R051 02-00-00 05-24-11 12:00:00 REGULAR 3172407 1098952 141
21 A002 R051 02-00-00 05-24-11 16:00:00 REGULAR 3172689 1099010 282
22 A002 R051 02-00-00 05-24-11 20:00:00 REGULAR 3173590 1099055 901
23 A002 R051 02-00-00 05-25-11 00:00:00 REGULAR 3173803 1099079 213
24 A002 R051 02-00-00 05-25-11 04:00:00 REGULAR 3173826 1099081 23
25 A002 R051 02-00-00 05-25-11 08:00:00 REGULAR 3173873 1099178 47
26 A002 R051 02-00-00 05-25-11 12:00:00 REGULAR 3174022 1099496 149
27 A002 R051 02-00-00 05-25-11 16:00:00 REGULAR 3174309 1099572 287
28 A002 R051 02-00-00 05-25-11 20:00:00 REGULAR 3175172 1099651 863
29 A002 R051 02-00-00 05-26-11 00:00:00 REGULAR 3175360 1099675 188
... ... ... ... ... ... ... ... ... ...
7310 A042 R086 01-00-04 05-22-11 20:00:00 REGULAR 912511 4167549 362
7311 A042 R086 01-00-04 05-23-11 00:00:00 REGULAR 912606 4167828 95
7312 A042 R086 01-00-04 05-23-11 04:00:00 REGULAR 912616 4167855 10
7313 A042 R086 01-00-04 05-23-11 08:00:00 REGULAR 912622 4168025 6
7314 A042 R086 01-00-04 05-23-11 12:00:00 REGULAR 912679 4168923 57
7315 A042 R086 01-00-04 05-23-11 16:00:00 REGULAR 912843 4169719 164
7316 A042 R086 01-00-04 05-23-11 20:00:00 REGULAR 913194 4170521 351
7317 A042 R086 01-00-04 05-24-11 00:00:00 REGULAR 913291 4170682 97
7318 A042 R086 01-00-04 05-24-11 04:00:00 REGULAR 913300 4170711 9
7319 A042 R086 01-00-04 05-24-11 08:00:00 REGULAR 913309 4170890 9
7320 A042 R086 01-00-04 05-24-11 12:00:00 REGULAR 913360 4171856 51
7321 A042 R086 01-00-04 05-24-11 16:00:00 REGULAR 913536 4172582 176
7322 A042 R086 01-00-04 05-24-11 20:00:00 REGULAR 913892 4173464 356
7323 A042 R086 01-00-04 05-25-11 00:00:00 REGULAR 914006 4173624 114
7326 A042 R086 01-00-04 05-25-11 08:00:00 REGULAR 914027 4173813 21
7327 A042 R086 01-00-04 05-25-11 12:00:00 REGULAR 914087 4174759 60
7328 A042 R086 01-00-04 05-25-11 16:00:00 REGULAR 914262 4175544 175
7329 A042 R086 01-00-04 05-25-11 20:00:00 REGULAR 914686 4176445 424
7330 A042 R086 01-00-04 05-26-11 00:00:00 REGULAR 914808 4176633 122
7331 A042 R086 01-00-04 05-26-11 04:00:00 REGULAR 914822 4176664 14
7332 A042 R086 01-00-04 05-26-11 08:00:00 REGULAR 914830 4176849 8
7333 A042 R086 01-00-04 05-26-11 12:00:00 REGULAR 914902 4177778 72
7334 A042 R086 01-00-04 05-26-11 16:00:00 REGULAR 915105 4178603 203
7335 A042 R086 01-00-04 05-26-11 20:00:00 REGULAR 915583 4179421 478
7336 A042 R086 01-00-04 05-27-11 00:00:00 REGULAR 915724 4179647 141
7337 A042 R086 01-00-04 05-27-11 04:00:00 REGULAR 915737 4179675 13
7338 A042 R086 01-00-04 05-27-11 08:00:00 REGULAR 915743 4179820 6
7339 A042 R086 01-00-04 05-27-11 12:00:00 REGULAR 915806 4180746 63
7340 A042 R086 01-00-04 05-27-11 16:00:00 REGULAR 916090 4181746 284
7341 A042 R086 01-00-04 05-27-11 20:00:00 REGULAR 916517 4182631 427

6182 rows × 9 columns