In [1]:
import requests
Graded = 7/8
1) What books topped the Hardcover Fiction NYT best-sellers list on Mother's Day in 2009 and 2010? How about Father's Day?
In [2]:
#Mother's Day in 2009 was May 10, 2009
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/2009-05-10/hardcover-fiction.json?api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
mom_09_data = response.json()
In [3]:
#print(mom_09_data)
#mom_09_data.keys()
#print(mom_09_data['results'])
for item in mom_09_data['results']:
for title in item['book_details']:
print(title['title'])
#Q: Is this the only way to get into a dictionary in a list in a dictionary in a list? To do another for loop?
In [4]:
#Mother's Day in 2010 was May 9, 2010
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/2010-05-09/hardcover-fiction.json?api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
mom_10_data = response.json()
In [5]:
#print(mom_10_data)
for item in mom_10_data['results']:
for title in item['book_details']:
print(title['title'])
In [6]:
#Father's Day in 2009 was June 21, 2009
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/2009-06-21/hardcover-fiction.json?api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
dad_09_data = response.json()
In [7]:
for item in dad_09_data['results']:
for title in item['book_details']:
print(title['title'])
In [8]:
#Father's Day in 2010 was June 20, 2010
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/2010-06-20/hardcover-fiction.json?api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
dad_10_data = response.json()
In [9]:
for item in dad_10_data['results']:
for title in item['book_details']:
print(title['title'])
2) What are all the different book categories the NYT ranked in June 6, 2009? How about June 6, 2015?
Question: To specify a date, include it in the URI path. To specify a response-format, add it as an extension. The other parameters in this table are specified as name-value pairs in a query string. (What is the difference between putting it in the URI path and putting it as a query?)
In [10]:
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/names.json?date=2009-06-06&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
june6_09_data = response.json()
In [11]:
#print(june6_09_data)
#Looks all right?
june6_09_data.keys()
#print(june6_09_data['results'])
#Looks like the categories are under display name and list name. I'll just go for list name.
#I hope I got the right list, I am actually not very sure.
for category in june6_09_data['results']:
print(category['list_name'])
In [12]:
response = requests.get("http://api.nytimes.com/svc/books/v2/lists/names.json?date=2015-06-06&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
june6_15_data = response.json()
In [13]:
#print(june6_15_data)
In [14]:
for category in june6_15_data['results']:
print(category['list_name'])
3) Muammar Gaddafi's name can be transliterated many many ways. His last name is often a source of a million and one versions - Gadafi, Gaddafi, Kadafi, and Qaddafi to name a few. How many times has the New York Times referred to him by each of those names?
Tip: Add "Libya" to your search to make sure (-ish) you're talking about the right guy.
In [15]:
#Gadafi
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=Gadafi&glocations=Libya&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gadafi_data = response.json()
In [16]:
#print(gadafi_data)
print("The NYT has referred to him by 'Gadafi' a total of", gadafi_data['response']['meta']['hits'], "times")
In [17]:
#Gaddafi
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=Gaddafi&glocations=Libya&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gaddafi_data = response.json()
In [18]:
#print(gaddafi_data)
print("The NYT has referred to him by 'Gaddafi' a total of", gaddafi_data['response']['meta']['hits'], "times")
In [19]:
#Kadafi
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=Kadafi&glocations=Libya&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
kadafi_data = response.json()
In [20]:
#print(kadafi_data)
print("The NYT has referred to him by 'Kadafi' a total of", kadafi_data['response']['meta']['hits'], "times")
In [21]:
#Qaddafi
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=qaddafi&glocations=Libya&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
qaddafi_data = response.json()
In [22]:
#print(qaddafi_data)
print("The NYT has referred to him by 'Qaddafi' a total of", qaddafi_data['response']['meta']['hits'], "times")
4) What's the title of the first story to mention the word 'hipster' in 1995? What's the first paragraph?
Question: What is the difference between query and filter query?
In [23]:
#hipster
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=hipster&begin_date=19950101&end_date=19951231&sort=oldest&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
hipster_data = response.json()
In [24]:
#print(hipster_data)
hipster_data.keys()
#print(hipster_data['response']['docs'][0]) #The first story
hipster_data['response']['docs'][0].keys()
print("The title of the first story to mention the word 'hipster' is", hipster_data['response']['docs'][0]['headline']['main'])
TA-Stephan: Didn't print out first paragraph.
5) How many times was gay marriage mentioned in the NYT between 1950-1959, 1960-1969, 1970-1978, 1980-1989, 1990-2099, 2000-2009, and 2010-present?
Tip: You'll want to put quotes around the search term so it isn't just looking for "gay" and "marriage" in the same article.
Tip: Write code to find the number of mentions between Jan 1, 1950 and Dec 31, 1959.
In [25]:
#gaymarriage
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=19500101&end_date=19591231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_50_data = response.json()
In [26]:
#print(gay_50_data)
print("The number of times gay marriage is mentioned between 1950 and 1959 is", gay_50_data['response']['meta']['hits'], "times")
In [27]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=19600101&end_date=19691231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_60_data = response.json()
In [28]:
print("The number of times gay marriage is mentioned between 1960 and 1969 is", gay_60_data['response']['meta']['hits'], "times")
In [29]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=19700101&end_date=19791231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_70_data = response.json()
In [30]:
print("The number of times gay marriage is mentioned between 1970 and 1979 is", gay_70_data['response']['meta']['hits'], "times")
In [50]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gaymarriage%22&begin_date=19800101&end_date=19891231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_80_data = response.json()
In [51]:
print("The number of times gay marriage is mentioned between 1980 and 1989 is", gay_80_data['response']['meta']['hits'], "times")
In [33]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=19900101&end_date=19991231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_90_data = response.json()
In [34]:
print("The number of times gay marriage is mentioned between 1990 and 1999 is", gay_90_data['response']['meta']['hits'], "times")
In [35]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=20000101&end_date=20091231&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_00_data = response.json()
In [36]:
print("The number of times gay marriage is mentioned between 2000 and 2001 is", gay_00_data['response']['meta']['hits'], "times")
In [37]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22gay%20marriage%22&begin_date=20100101&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
gay_10_data = response.json()
In [38]:
print("The number of times gay marriage is mentioned between 2010 and now is", gay_10_data['response']['meta']['hits'], "times")
6) What section talks about motorcycles the most?
Tip: You'll be using facets
In [39]:
response = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=motorcycle&facet_field=section_name&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
motor_data = response.json()
In [40]:
top_motor = motor_data['response']['facets']['section_name']['terms']
for item in top_motor:
print(item['term'], item['count'])
print("The section that mentions motorcycles the most is the World section with 1739 mentions")
#QUESTION: Why can't I do this?
#Question: Is there a way to automatically tell you highest value?
#top_motor = (motor_data['response']['facets']['section_name']['terms'][0])
#for item in top_motor:
#print("The section that mentions motorcycles the most is the", item['term'], "section with", item['count'], "mentions")
7) How many of the last 20 movies reviewed by the NYT were Critics' Picks? How about the last 40? The last 60?
Tip: You really don't want to do this 3 separate times (1-20, 21-40 and 41-60) and add them together. What if, perhaps, you were able to figure out how to combine two lists? Then you could have a 1-20 list, a 1-40 list, and a 1-60 list, and then just run similar code for each of them.
In [41]:
response = requests.get("http://api.nytimes.com/svc/movies/v2/reviews/search.json?resource-type=all&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
movie20_data = response.json()
In [42]:
#print(movie20_data)
#movie20_data.keys()
#print(movie20_data['results'])
#I am assuming critics_pick = 0 means no and critics_pick = 1 means yes
movie_count = 0
for movie in movie20_data['results']:
#print(movie['display_title'], movie['critics_pick'])
if movie['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 20 movies reviewed by the NYT")
In [43]:
response = requests.get("http://api.nytimes.com/svc/movies/v2/reviews/search.json?resource-type=all&offset=20&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
movie40_data = response.json()
In [44]:
#print(movie40_data)
movie40 = movie40_data['results'] + movie20_data['results']
#print(movie40)
movie_count = 0
for pie in movie40:
if pie['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 40 movies reviewed by the NYT")
In [45]:
response = requests.get("http://api.nytimes.com/svc/movies/v2/reviews/search.json?resource-type=all&offset=40&api-key=2ca9e983dcfd4b1ba330521af1c9c2b2")
movie60_data = response.json()
In [46]:
movie60 = movie60_data['results'] + movie40_data['results'] + movie20_data['results']
movie_count = 0
for berry in movie60:
if berry['critics_pick'] > 0:
movie_count = movie_count + 1
print("There are", movie_count, "Critics' Picks movies in the last 60 movies reviewed by the NYT")
8) Out of the last 40 movie reviews from the NYT, which critic has written the most reviews?
In [52]:
byline_count = []
from collections import Counter
#print(movie40_data['results'])
for stuff in movie40_data['results']:
byline = stuff['byline']
#print(byline)
byline_count.append(byline)
counts = Counter(byline_count)
counts.most_common(1)
Out[52]:
In [ ]:
In [ ]: