All API's: http://developer.nytimes.com/ Article search API: http://developer.nytimes.com/article_search_v2.json Best-seller API: http://developer.nytimes.com/books_api.json#/Documentation Test/build queries: http://developer.nytimes.com/
Tip: Remember to include your API key in all requests! And their interactive web thing is pretty bad. You'll need to register for the API key.
In [1]:
#API Key: 0c3ba2a8848c44eea6a3443a17e57448
In [2]:
import requests
bestseller_response = requests.get('http://api.nytimes.com/svc/books/v2/lists/2009-05-10/hardcover-fiction?api-key=0c3ba2a8848c44eea6a3443a17e57448')
bestseller_data = bestseller_response.json()
print("The type of bestseller_data is:", type(bestseller_data))
print("The keys of bestseller_data are:", bestseller_data.keys())
In [3]:
# Exploring the data structure further
bestseller_books = bestseller_data['results']
print(type(bestseller_books))
print(bestseller_books[0])
In [4]:
for book in bestseller_books:
#print("NEW BOOK!!!")
#print(book['book_details'])
#print(book['rank'])
if book['rank'] == 1:
for element in book['book_details']:
print("The book that topped the hardcover fiction NYT Beststeller list on Mothers Day in 2009 was", element['title'], "written by", element['author'])
In [5]:
def bestseller(x, y):
bestsellerA_response = requests.get('http://api.nytimes.com/svc/books/v2/lists/'+ x +'/hardcover-fiction?api-key=0c3ba2a8848c44eea6a3443a17e57448')
bestsellerA_data = bestsellerA_response.json()
bestsellerA_books = bestsellerA_data['results']
for book in bestsellerA_books:
if book['rank'] == 1:
for element in book['book_details']:
print("The book that topped the hardcover fiction NYT Beststeller list on", y, "was",
element['title'], "written by", element['author'])
bestseller('2009-05-10', "Mothers Day 2009")
bestseller('2010-05-09', "Mothers Day 2010")
bestseller('2009-06-21', "Fathers Day 2009")
bestseller('2010-06-20', "Fathers Day 2010")
#Alternative solution would be, instead of putting this code into a function to loop it:
#1) to create a dictionary called dates containing y as keys and x as values to these keys
#2) to take the above code and nest it into a for loop that loops through the dates, each time using the next key:value pair
# for date in dates:
# replace value in URL and run the above code used inside the function
# replace key in print statement
In [6]:
# STEP 1: Exploring the data structure using just one of the dates from the question
bookcat_response = requests.get('http://api.nytimes.com/svc/books/v2/lists/names.json?published-date=2009-06-06&api-key=0c3ba2a8848c44eea6a3443a17e57448')
bookcat_data = bookcat_response.json()
print(type(bookcat_data))
print(bookcat_data.keys())
bookcat = bookcat_data['results']
print(type(bookcat))
print(bookcat[0])
In [7]:
# STEP 2: Writing a loop that runs the same code for both dates (no function, as only one variable)
dates = ['2009-06-06', '2015-06-15']
for date in dates:
bookcatN_response = requests.get('http://api.nytimes.com/svc/books/v2/lists/names.json?published-date=' + date + '&api-key=0c3ba2a8848c44eea6a3443a17e57448')
bookcatN_data = bookcatN_response.json()
bookcatN = bookcatN_data['results']
category_listN = []
for category in bookcatN:
category_listN.append(category['display_name'])
print(" ")
print("THESE WERE THE DIFFERENT BOOK CATEGORIES THE NYT RANKED ON", date)
for cat in category_listN:
print(cat)
Tip: Add "Libya" to your search to make sure (-ish) you're talking about the right guy.
In [8]:
# STEP 1a: EXPLORING THE DATA
test_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=Gaddafi+Libya&api-key=0c3ba2a8848c44eea6a3443a17e57448')
test_data = test_response.json()
print(type(test_data))
print(test_data.keys())
test_hits = test_data['response']
print(type(test_hits))
print(test_hits.keys())
In [9]:
# STEP 1b: EXPLORING THE META DATA
test_hits_meta = test_data['response']['meta']
print("The meta data of the search request is a", type(test_hits_meta))
print("The dictionary despot_hits_meta has the following keys:", test_hits_meta.keys())
print("The search requests with the TEST URL yields total:")
test_hit_count = test_hits_meta['hits']
print(test_hit_count)
In [10]:
# STEP 2: BUILDING THE CODE TO LOOP THROUGH DIFFERENT SPELLINGS
despot_names = ['Gadafi', 'Gaddafi', 'Kadafi', 'Qaddafi']
for name in despot_names:
despot_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=' + name +'+Libya&api-key=0c3ba2a8848c44eea6a3443a17e57448')
despot_data = despot_response.json()
despot_hits_meta = despot_data['response']['meta']
despot_hit_count = despot_hits_meta['hits']
print("The NYT has referred to the Libyan despot", despot_hit_count, "times using the spelling", name)
In [11]:
hip_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=hipster&fq=pub_year:1995&api-key=0c3ba2a8848c44eea6a3443a17e57448')
hip_data = hip_response.json()
print(type(hip_data))
print(hip_data.keys())
# STEP 1: EXPLORING THE DATA STRUCTURE:
hipsters = hip_data['response']
#print(hipsters)
#hipsters_meta = hipsters['meta']
#print(type(hipsters_meta))
hipsters_results = hipsters['docs']
print(hipsters_results[0].keys())
#print(type(hipsters_results))
In [12]:
#STEP 2: LOOPING FOR THE ANSWER:
earliest_date = '1996-01-01'
for mention in hipsters_results:
if mention['pub_date'] < earliest_date:
earliest_date = mention['pub_date']
print("This is the headline of the first text to mention 'hipster' in 1995:", mention['headline']['main'])
print("It was published on:", mention['pub_date'])
print("This is its lead paragraph:")
print(mention['lead_paragraph'])
Tip: You'll want to put quotes around the search term so it isn't just looking for "gay" and "marriage" in the same article.
Tip: Write code to find the number of mentions between Jan 1, 1950 and Dec 31, 1959.
In [13]:
# data structure requested same as in task 3, just this time loop though different date ranges
def countmention(a, b, c):
if b == ' ':
marry_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q="gay marriage"&begin_date='+ a +'&api-key=0c3ba2a8848c44eea6a3443a17e57448')
else:
marry_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q="gay marriage"&begin_date='+ a +'&end_date='+ b +'&api-key=0c3ba2a8848c44eea6a3443a17e57448')
marry_data = marry_response.json()
marry_hits_meta = marry_data['response']['meta']
marry_hit_count = marry_hits_meta['hits']
print("The count for NYT articles mentioning 'gay marriage' between", c, "is", marry_hit_count)
#supposedly, there's a way to solve the following part in a more efficient way, but those I tried did not work,
#so it ended up being more time-efficient just to type it:
countmention('19500101', '19591231', '1950 and 1959')
countmention('19600101', '19691231', '1960 and 1969')
countmention('19700101', '19791231', '1970 and 1979')
countmention('19800101', '19891231', '1980 and 1989')
countmention('19900101', '19991231', '1990 and 1999')
countmention('20000101', '20091231', '2000 and 2009')
countmention('20100101', ' ', '2010 and present')
In [14]:
moto_response = requests.get('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=motorcycle&facet_field=section_name&facet_filter=true&api-key=0c3ba2a8848c44eea6a3443a17e57448')
moto_data = moto_response.json()
#STEP 1: EXPLORING DATA STRUCTURE
#print(type(moto_data))
#print(moto_data.keys())
#print(moto_data['response'])
#print(moto_data['response'].keys())
#print(moto_data['response']['facets'])
#STEP 2: Code to get to the answer
moto_facets = moto_data['response']['facets']
#print(moto_facets)
#print(moto_facets.keys())
moto_sections = moto_facets['section_name']['terms']
#print(moto_sections)
#this for loop is not necessary, but it's nice to know the counts
#(also to check whether the next loop identifies the right section)
for section in moto_sections:
print("The section", section['term'], "mentions motorcycles", section['count'], "times.")
most_motorcycles = 0
for section in moto_sections:
if section['count'] > most_motorcycles:
most_motorcycles = section['count']
print(" ")
print("That means the section", section['term'], "mentions motorcycles the most, namely", section['count'], "times.")
Tip: You really don't want to do this 3 separate times (1-20, 21-40 and 41-60) and add them together. What if, perhaps, you were able to figure out how to combine two lists? Then you could have a 1-20 list, a 1-40 list, and a 1-60 list, and then just run similar code for each of them.
In [35]:
picks_offset_values = [0, 20, 40]
picks_review_list = []
for value in picks_offset_values:
picks_response = requests.get ('http://api.nytimes.com/svc/movies/v2/reviews/search.json?&offset=' + str(value) + '&api-key=0c3ba2a8848c44eea6a3443a17e57448')
picks_data = picks_response.json()
#STEP 1: EXPLORING THE DATA STRUCTURE (without the loop)
#print(picks_data.keys())
#print(picks_data['num_results'])
#print(picks_data['results'])
#print(type(picks_data['results']))
#print(picks_data['results'][0].keys())
#STEP 2: After writing a test code (not shown) without the loop, now CODING THE LOOP
last_reviews = picks_data['num_results']
picks_results = picks_data['results']
critics_pick_count = 0
for review in picks_results:
if review['critics_pick'] == 1:
critics_pick_count = critics_pick_count + 1
picks_new_count = critics_pick_count
picks_review_list.append(picks_new_count)
print("Out of the last", last_reviews + value, "movie reviews,", sum(picks_review_list), "were Critics' picks.")
In [280]:
#STEP 1: EXPLORING THE DATA STRUCTURE (without the loop)
#critics_response = requests.get('http://api.nytimes.com/svc/movies/v2/reviews/search.json?&offset=0&api-key=0c3ba2a8848c44eea6a3443a17e57448')
#critics_data = critics_response.json()
#print(critics_data.keys())
#print(critics_data['num_results'])
#print(critics_data['results'])
#print(type(critics_data['results']))
#print(critics_data['results'][0].keys())
#STEP 2: CREATE A LOOP, THAT GOES THROUGH THE SEARCH RESULTS FOR EACH OFFSET VALUE AND STORES THE RESULTS IN THE SAME LIST
#(That list is then passed on to step 3)
critics_offset_value = [0, 20]
critics_list = [ ]
for value in critics_offset_value:
critics_response = requests.get('http://api.nytimes.com/svc/movies/v2/reviews/search.json?&offset=' + str(value) + '&api-key=0c3ba2a8848c44eea6a3443a17e57448')
critics_data = critics_response.json()
critics = critics_data['results']
for review in critics:
critics_list.append(review['byline'])
#print(critics_list)
unique_critics = set(critics_list)
#print(unique_critics)
#STEP 3: FOR EVERY NAME IN THE UNIQUE CRITICS LIST, LOOP THROUGH NON-UNIQUE LIST TO COUNT HOW OFTEN THEY OCCUR
#STEP 4: SELECT THE ONE THAT HAS WRITTEN THE MOST (from the #print statement below, I know it's two people with same score)
max_count = 0
for name in unique_critics:
name_count = 0
for critic in critics_list:
if critic == name:
name_count = name_count + 1
if name_count > max_count:
max_count = name_count
max_name = name
if name_count == max_count:
same_count = name_count
same_name = name
#print(name, "has written", name_count, "reviews out of the last 40 reviews.")
print(max_name, "has written the most of the last 40 reviews:", max_count)
print(same_name, "has written the most of the last 40 reviews:", same_count)