In [1]:
import requests
Graded = 7/8
1) What books topped the Hardcover Fiction NYT best-sellers list on Mother's Day in 2009 and 2010? How about Father's Day?
In [2]:
#Mother's Day in 2009 was May 10, 2009
response = requests.get("")
mom_09_data = response.json()
In [3]:
for item in mom_09_data['results']:
for title in item['book_details']:
#Q: Is this the only way to get into a dictionary in a list in a dictionary in a list? To do another for loop?
In [4]:
#Mother's Day in 2010 was May 9, 2010
response = requests.get("")
mom_10_data = response.json()
In [5]:
for item in mom_10_data['results']:
for title in item['book_details']:
In [6]:
#Father's Day in 2009 was June 21, 2009
response = requests.get("")
dad_09_data = response.json()
In [7]:
for item in dad_09_data['results']:
for title in item['book_details']:
In [8]:
#Father's Day in 2010 was June 20, 2010
response = requests.get("")
dad_10_data = response.json()
In [9]:
for item in dad_10_data['results']:
for title in item['book_details']:
2) What are all the different book categories the NYT ranked in June 6, 2009? How about June 6, 2015?
Question: To specify a date, include it in the URI path. To specify a response-format, add it as an extension. The other parameters in this table are specified as name-value pairs in a query string. (What is the difference between putting it in the URI path and putting it as a query?)
In [10]:
response = requests.get("")
june6_09_data = response.json()
In [11]:
#Looks all right?
#Looks like the categories are under display name and list name. I'll just go for list name.
#I hope I got the right list, I am actually not very sure.
for category in june6_09_data['results']:
In [12]:
response = requests.get("")
june6_15_data = response.json()
In [13]:
In [14]:
for category in june6_15_data['results']:
3) Muammar Gaddafi's name can be transliterated many many ways. His last name is often a source of a million and one versions - Gadafi, Gaddafi, Kadafi, and Qaddafi to name a few. How many times has the New York Times referred to him by each of those names?
Tip: Add "Libya" to your search to make sure (-ish) you're talking about the right guy.
In [15]:
response = requests.get("")
gadafi_data = response.json()
In [16]:
print("The NYT has referred to him by 'Gadafi' a total of", gadafi_data['response']['meta']['hits'], "times")
In [17]:
response = requests.get("")
gaddafi_data = response.json()
In [18]:
print("The NYT has referred to him by 'Gaddafi' a total of", gaddafi_data['response']['meta']['hits'], "times")
In [19]:
response = requests.get("")
kadafi_data = response.json()
In [20]:
print("The NYT has referred to him by 'Kadafi' a total of", kadafi_data['response']['meta']['hits'], "times")
In [21]:
response = requests.get("")
qaddafi_data = response.json()
In [22]:
print("The NYT has referred to him by 'Qaddafi' a total of", qaddafi_data['response']['meta']['hits'], "times")
4) What's the title of the first story to mention the word 'hipster' in 1995? What's the first paragraph?
Question: What is the difference between query and filter query?
In [23]:
response = requests.get("")
hipster_data = response.json()
In [24]:
#print(hipster_data['response']['docs'][0]) #The first story
print("The title of the first story to mention the word 'hipster' is", hipster_data['response']['docs'][0]['headline']['main'])
TA-Stephan: Didn't print out first paragraph.
5) How many times was gay marriage mentioned in the NYT between 1950-1959, 1960-1969, 1970-1978, 1980-1989, 1990-2099, 2000-2009, and 2010-present?
Tip: You'll want to put quotes around the search term so it isn't just looking for "gay" and "marriage" in the same article.
Tip: Write code to find the number of mentions between Jan 1, 1950 and Dec 31, 1959.
In [25]:
response = requests.get("")
gay_50_data = response.json()
In [26]:
print("The number of times gay marriage is mentioned between 1950 and 1959 is", gay_50_data['response']['meta']['hits'], "times")
In [27]:
response = requests.get("")
gay_60_data = response.json()
In [28]:
print("The number of times gay marriage is mentioned between 1960 and 1969 is", gay_60_data['response']['meta']['hits'], "times")
In [29]:
response = requests.get("")
gay_70_data = response.json()
In [30]:
print("The number of times gay marriage is mentioned between 1970 and 1979 is", gay_70_data['response']['meta']['hits'], "times")
In [50]:
response = requests.get("")
gay_80_data = response.json()
In [51]:
print("The number of times gay marriage is mentioned between 1980 and 1989 is", gay_80_data['response']['meta']['hits'], "times")
In [33]:
response = requests.get("")
gay_90_data = response.json()
In [34]:
print("The number of times gay marriage is mentioned between 1990 and 1999 is", gay_90_data['response']['meta']['hits'], "times")
In [35]:
response = requests.get("")
gay_00_data = response.json()
In [36]:
print("The number of times gay marriage is mentioned between 2000 and 2001 is", gay_00_data['response']['meta']['hits'], "times")
In [37]:
response = requests.get("")
gay_10_data = response.json()
In [38]:
print("The number of times gay marriage is mentioned between 2010 and now is", gay_10_data['response']['meta']['hits'], "times")
6) What section talks about motorcycles the most?
Tip: You'll be using facets
In [39]:
response = requests.get("")
motor_data = response.json()
In [40]:
top_motor = motor_data['response']['facets']['section_name']['terms']
for item in top_motor:
print(item['term'], item['count'])
print("The section that mentions motorcycles the most is the World section with 1739 mentions")
#QUESTION: Why can't I do this?
#Question: Is there a way to automatically tell you highest value?
#top_motor = (motor_data['response']['facets']['section_name']['terms'][0])
#for item in top_motor:
#print("The section that mentions motorcycles the most is the", item['term'], "section with", item['count'], "mentions")
7) How many of the last 20 movies reviewed by the NYT were Critics' Picks? How about the last 40? The last 60?
Tip: You really don't want to do this 3 separate times (1-20, 21-40 and 41-60) and add them together. What if, perhaps, you were able to figure out how to combine two lists? Then you could have a 1-20 list, a 1-40 list, and a 1-60 list, and then just run similar code for each of them.
In [41]:
response = requests.get("")
movie20_data = response.json()
In [42]:
#I am assuming critics_pick = 0 means no and critics_pick = 1 means yes
movie_count = 0
for movie in movie20_data['results']:
#print(movie['display_title'], movie['critics_pick'])
if movie['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 20 movies reviewed by the NYT")
In [43]:
response = requests.get("")
movie40_data = response.json()
In [44]:
movie40 = movie40_data['results'] + movie20_data['results']
movie_count = 0
for pie in movie40:
if pie['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 40 movies reviewed by the NYT")
In [45]:
response = requests.get("")
movie60_data = response.json()
In [46]:
movie60 = movie60_data['results'] + movie40_data['results'] + movie20_data['results']
movie_count = 0
for berry in movie60:
if berry['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 60 movies reviewed by the NYT")
8) Out of the last 40 movie reviews from the NYT, which critic has written the most reviews?
In [52]:
byline_count = []
from collections import Counter
for stuff in movie40_data['results']:
byline = stuff['byline']
counts = Counter(byline_count)
In [ ]:
In [ ]: