In [1]:
import os
os.getcwd()
os.chdir("C:\Vindico\Projects\Data\Course\Python\Udacity\Introduction to Data Science\Lesson 2\Exercise")
os.getcwd()
Out[1]:
In [2]:
import csv
def fix_turnstile_data(filenames):
'''
Filenames is a list of MTA Subway turnstile text files. A link to an example
MTA Subway turnstile text file can be seen at the URL below:
http://web.mta.info/developers/data/nyct/turnstile/turnstile_110507.txt
As you can see, there are numerous data points included in each row of the
a MTA Subway turnstile text file.
You want to write a function that will update each row in the text
file so there is only one entry per row. A few examples below:
A002,R051,02-00-00,05-28-11,00:00:00,REGULAR,003178521,001100739
A002,R051,02-00-00,05-28-11,04:00:00,REGULAR,003178541,001100746
A002,R051,02-00-00,05-28-11,08:00:00,REGULAR,003178559,001100775
Write the updates to a different text file in the format of "updated_" + filename.
For example:
1) if you read in a text file called "turnstile_110521.txt"
2) you should write the updated data to "updated_turnstile_110521.txt"
The order of the fields should be preserved.
You can see a sample of the turnstile text file that's passed into this function
and the the corresponding updated file in the links below:
Sample input file:
https://www.dropbox.com/s/mpin5zv4hgrx244/turnstile_110528.txt
Sample updated file:
https://www.dropbox.com/s/074xbgio4c39b7h/solution_turnstile_110528.txt
'''
for name in filenames:
# your code here
with open(name,'rb') as f:
reader = csv.reader(f)
with open('updated_' + name,'wb') as f:
writer = csv.writer(f)
updated_rows = []
for row in reader:
updated_row = row[0:3]
for i in range(0,len(row[3:len(row)])):
updated_row.append(row[i+3])
if (i + 1) % 5 == 0:
updated_rows.append(updated_row)
updated_row = row[0:3]
writer.writerows(updated_rows)
In [3]:
filename =["turnstile_110528.csv"]
fix_turnstile_data(filename)
In [4]:
import csv
def fix_turnstile_data(name):
'''
Filenames is a list of MTA Subway turnstile text files. A link to an example
MTA Subway turnstile text file can be seen at the URL below:
http://web.mta.info/developers/data/nyct/turnstile/turnstile_110507.txt
As you can see, there are numerous data points included in each row of the
a MTA Subway turnstile text file.
You want to write a function that will update each row in the text
file so there is only one entry per row. A few examples below:
A002,R051,02-00-00,05-28-11,00:00:00,REGULAR,003178521,001100739
A002,R051,02-00-00,05-28-11,04:00:00,REGULAR,003178541,001100746
A002,R051,02-00-00,05-28-11,08:00:00,REGULAR,003178559,001100775
Write the updates to a different text file in the format of "updated_" + filename.
For example:
1) if you read in a text file called "turnstile_110521.txt"
2) you should write the updated data to "updated_turnstile_110521.txt"
The order of the fields should be preserved.
You can see a sample of the turnstile text file that's passed into this function
and the the corresponding updated file in the links below:
Sample input file:
https://www.dropbox.com/s/mpin5zv4hgrx244/turnstile_110528.txt
Sample updated file:
https://www.dropbox.com/s/074xbgio4c39b7h/solution_turnstile_110528.txt
'''
# your code here
with open(name,'rb') as f:
reader = csv.reader(f)
with open('updated_' + name,'wb') as f:
writer = csv.writer(f)
updated_rows = []
for row in reader:
updated_row = row[0:3]
for i in range(0,len(row[3:len(row)])):
updated_row.append(row[i+3])
if (i + 1) % 5 == 0:
updated_rows.append(updated_row)
updated_row = row[0:3]
writer.writerows(updated_rows)
In [5]:
filename ="turnstile_110528.csv"
fix_turnstile_data(filename)