In [ ]:
# Copy the file
!curl https://raw.githubusercontent.com/TeachingDataScience/datasets/master/nyt1.csv > nyt1.csv
In [ ]:
# %load nytimes_counter.py
# Import required libraries
import csv
# Start a counter and store the textfile in memory
gender_counter = {}
age_counter = {}
lines = csv.reader(open('nyt1.csv'))
lines.next()
# For each line, fill in the counters
for line in lines:
age, gender, impressions, clicks, signed_in = line
if str(gender) not in gender_counter:
gender_counter[gender] = 0
gender_counter[gender] += 1
if str(age) not in age_counter:
age_counter[age] = 0
age_counter[age] += 1
print "Gender 0: ", gender_counter['0']
print "Gender 1: ", gender_counter['1']
print "Ages: "
print age_counter
In [4]:
# Run the file
!python nytimes_counter.py
Note: Using %load allows a file to be loaded for running in the notebook
In [5]:
import pandas as pd
fileurl = "https://raw.githubusercontent.com/TeachingDataScience/datasets/master/nyt1.csv"
nyt = pd.read_csv(fileurl)
print "Gender 0: ", len(nyt[nyt.Gender == 0])
print "Gender 1: ", len(nyt[nyt.Gender == 1])
print nyt.groupby('Age').Age.count()
Note: How would we make this re-usable?