Graded = 8/8
In [1]:
#All API's: http://developer.nytimes.com/
#Article search API: http://developer.nytimes.com/article_search_v2.json
#Best-seller API: http://developer.nytimes.com/books_api.json#/Documentation
#Test/build queries: http://developer.nytimes.com/
# 1) What books topped the Hardcover Fiction NYT best-sellers list on Mother's Day in 2009 and 2010?
# How about Father's Day?
import requests
def printBestSellers( date ):
url = "https://api.nytimes.com/svc/books/v3/lists//.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += "&date=" + date
url += "&list-name=hardcover-fiction"
response = requests.get(url)
data = response.json()
books = data['results']
for book in books:
print(book['book_details'][0]['title'], "by", book['book_details'][0]['author'])
return
print("\nMay 10, 2009:")
printBestSellers("2009-05-10")
print("\nMay 9, 2010:")
printBestSellers("2010-05-09")
print("\nJune 21, 2009:")
printBestSellers("2009-06-21")
print("\nJune 20, 2010")
printBestSellers("2010-06-20")
In [2]:
# 2) What are all the different book categories the NYT ranked in June 6, 2009? How about June 6, 2015?
def printBestSellerRanks( date ):
url = "https://api.nytimes.com/svc/books/v3/lists/names.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += "&date=" + date
response = requests.get(url)
data = response.json()
print("The NYT ranked", data['num_results'], "different book categories in June 6, " + date[:4] +":")
categories = data['results']
categories_list = []
for category in categories:
categories_list.append(category['list_name'])
print(str.join(', ', categories_list) + ".\n")
return
printBestSellerRanks("2009-06-06")
printBestSellerRanks("2015-06-06")
In [3]:
# 3) Muammar Gaddafi's name can be transliterated many many ways. His last name is often a source of a million and one versions - Gadafi, Gaddafi, Kadafi, and Qaddafi to name a few. How many times has the New York Times referred to him by each of those names?
# Tip: Add "Libya" to your search to make sure (-ish) you're talking about the right guy.
def searchFor( query ):
print("Searching for “" + query + "”…")
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += "&q=" + query + " libya"
response = requests.get(url)
data = response.json()
print(data['response']['meta']['hits'], "hits found.")
return
searchFor("Gadafi")
searchFor("Gaddafi")
searchFor("Kadafi")
searchFor("Qaddafi")
In [4]:
# 4) What's the title of the first story to mention the word 'hipster' in 1995? What's the first paragraph?
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += "&q=hipster"
url += "&begin_date=19950101"
url += "&sort=oldest"
response = requests.get(url)
data = response.json()
print(data['response']['docs'][0]['headline']['main'])
print(data['response']['docs'][0]['lead_paragraph'])
In [5]:
# 5) How many times was gay marriage mentioned in the NYT between
# 1950-1959, 1960-1969, 1970-1978, 1980-1989, 1990-2099, 2000-2009, and 2010-present?
# Tip: You'll want to put quotes around the search term so it isn't just looking for "gay" and "marriage" in the same article.
# Tip: Write code to find the number of mentions between Jan 1, 1950 and Dec 31, 1959.
def marriageHits(beginDate, endDate = 0):
beginString = str(beginDate) + "0101"
endString = str(beginDate+9)+"1231"
if not endDate == 0: # custom end date
endString = str(endDate)+1231
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += '&q="gay marriage"'
url += "&begin_date="+beginString
url += "&end_date="+endString
response = requests.get(url)
data = response.json()
print(beginDate, "-", (beginDate+9), ":", data['response']['meta']['hits'], "hits found.")
return
for year in range(1960, 2020, 10):
marriageHits(year)
In [6]:
# 6) What section talks about motorcycles the most?
# Tip: You'll be using facets
# "facet_field" - see http://developer.nytimes.com/article_search_v2.json#/Documentation/GET/articlesearch.json
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += "&q=motorcycles"
url += "&facet_field=section_name"
response = requests.get(url)
data = response.json()
sections = data['response']['facets']['section_name']['terms']
#for section in sections:
# print(section['term'], section['count'])
# Result sorted by count, so we display the first item.
print("The section which talks about motorcycles the most is", "“" + sections[0]['term'] + "”", "with", sections[0]['count'], "hits.")
In [7]:
# 7) How many of the last 20 movies reviewed by the NYT were Critics' Picks? How about the last 40? The last 60?
# Movie reviews: http://developer.nytimes.com/movie_reviews_v2.json -> /reviews/search.json -> "critics-pick"
offset = [0, 20, 40]
list_names = ["last 20", "last 40", "last 60"]
author_list = []
def getReviews(offset):
url = "https://api.nytimes.com/svc/movies/v2/reviews/search.json"
url += "?api-key=71621eb479f045bf8bee783b6943fdd4"
url += '&offset=' + str(offset)
response = requests.get(url)
return response.json()
total_picks = 0
for i in range(0, 3):
temp_reviews = getReviews(offset[i])
for review in temp_reviews['results']:
# print(review['headline'], review['critics_pick']) # to test the results
total_picks += review['critics_pick']
if(i <2): #only in range 0-40
author_list.append(review['byline'])
print("The", list_names[i], "movies reviewed by the NYT contains", total_picks, "critick picks.")
# 8) Out of the last 40 movie reviews from the NYT, which critic has written the most reviews?
# set([iterable]): https://docs.python.org/3.5/library/stdtypes.html?highlight=set#set
# max(iterable, key): https://docs.python.org/3.5/library/functions.html?highlight=max#max
# "The key argument specifies a one-argument ordering function like that used for list.sort()"
print(max(set(author_list), key=author_list.count), "has written the most reviews.")