Use the requests module to make a HTTP request to http://www.github.com/ibm
Get status code for the request
In [1]:
import requests
url = 'http://www.github.com/ibm'
response = requests.get(url)
print(response.status_code)
Get header information
In [ ]:
import requests
url = 'http://www.github.com/ibm'
response = requests.get(url)
print(response.status_code)
if response.status_code == 200:
print('Response status - OK ')
print(response.headers)
else:
print('Error making the HTTP request ',response.status_code )
Get the body Information
In [ ]:
import requests
url = 'http://www.github.com/ibm'
response = requests.get(url)
print(response.status_code)
if response.status_code == 200:
print('Response status - OK ')
print(response.text)
else:
print('Error making the HTTP request ',response.status_code )
The way these work is similar to viewing a web page. When you point your browser to a website, you do it with a URL (http://www.github.com/ibm for instance). Github sends you back data containing HTML, CSS, and Javascript. Your browser uses this data to construct the page that you see. The API works similarly, you request data with a URL (http://api.github.com/org/ibm), but instead of getting HTML and such, you get data formatted as JSON.
In [9]:
import requests
url = "https://api.github.com/orgs/ibm"
response = requests.get(url)
if response.status_code == 200:
print('Response status - OK ')
print(response.headers['X-RateLimit-Remaining'])
else:
print('Error making the HTTP request ',response.status_code )
Authenticate requests to increase the API request limit. Access data that requires authentication.
Unfortunately different websites have different ways of generating and using the token and consumer keys. Hence we will need to write the authorization code for each website seperately. HOwever, every website provides detailed information on how you can generate and send the token and keys.
In [ ]:
import requests
def GithubAPI(url):
""" Make a HTTP request for the given URL and send the response body
back to the calling function"""
# Use basic authentication
response = requests.get(url, auth=("ENTER USER ID","ENTER PASSWORD"))
if response.status_code == 200:
print('Response status - OK ')
print(response.headers['X-RateLimit-Remaining'])
return response.text
else:
print('Error making the HTTP request ',response.status_code )
return None
def main():
url = "https://api.github.com/orgs/ibm"
txt_response = GithubAPI(url)
if txt_response:
print(txt_response)
main()
In [13]:
import requests
import json
def GithubAPI(url):
""" Make a HTTP request for the given URL and send the response body
back to the calling function"""
response = requests.get(url)
if response.status_code == 200:
print('Response status - OK ')
return response.json()
else:
print('Error making the HTTP request ',response.status_code )
return None
def main():
url = "https://api.github.com/orgs/ibm"
txt_response = GithubAPI(url)
if txt_response:
print('The number of public repos are : ',txt_response['public_repos'])
main()
In [ ]:
import requests
import json
def GithubAPI(url):
""" Make a HTTP request for the given URL and send the response body
back to the calling function"""
response = requests.get(url, auth("ENTER USER ID","ENTER PASSWORD"))
if response.status_code == 200:
print('Response status - OK ')
return response.json()
else:
print('Error making the HTTP request ',response.status_code )
return None
def main():
url = "https://api.github.com/orgs/ibm"
response_json = GithubAPI(url)
if response_json:
print('The number of public repos are : ',response_json['public_repos'])
repo_url = response_json['repos_url']
repo_response = GithubAPI(repo_url)
for repo in repo_response:
print([repo['id'],repo['name']])
main()
In [ ]:
import requests
import json
def GithubAPI(url):
""" Make a HTTP request for the given URL and send the response body
back to the calling function"""
response = requests.get(url, auth = ("ENTER USER ID","ENTER PASSWORD"))
if response.status_code == 200:
print('Response status - OK ')
return response.json()
else:
print('Error making the HTTP request ',response.status_code )
return None
def main():
url = "https://api.github.com/orgs/ibm"
response_json = GithubAPI(url)
if response_json:
print('The number of public repos are : ',response_json['public_repos'])
repo_url = response_json['repos_url']
total_no = response_json['public_repos']
per_page = 100
page_count = 1
while page_count < total_no/100:
#Display 100 repos per page and traverse the pages untill we get the last page
page_url = repo_url+"?per_page=100&page_no="+str(page_count)
print(page_url)
repo_response = GithubAPI(page_url)
# Increment page number
page_count = page_count+1
for repo in repo_response:
print([repo['id'],repo['name']])
main()
Write a code to append data row wise to a csv file
In [ ]:
import csv
WRITE_CSV = "C:/Users/kmpoo/Dropbox/HEC/Teaching/Python for PhD Mar 2018/python4phd/Session 2/ipython/Repo_csv.csv"
with open(WRITE_CSV, 'at',encoding = 'utf-8', newline='') as csv_obj:
write = csv.writer(csv_obj) # Note it is csv.writer not reader
write.writerow(['REPO ID','REPO NAME'])
What do you think will happen if we use 'wt' as mode instead of 'at' ?
Write a program so that you save the IBM repositories into the CSV file. So that each row is a new repository and column 1 is the ID and column 2 is the name
In [ ]:
#Enter code here
import requests
import json
import csv
WRITE_CSV = "C:/Users/kmpoo/Dropbox/HEC/Teaching/Python for PhD Mar 2018/python4phd/Session 2/ipython/Repo_csv.csv"
def appendcsv(data_list):
with open(WRITE_CSV, 'at',encoding = 'utf-8', newline='') as csv_obj:
write = csv.writer(csv_obj) # Note it is csv.writer not reader
write.writerow(data_list)
def GithubAPI(url):
""" Make a HTTP request for the given URL and send the response body
back to the calling function"""
response = requests.get(url, auth = ("ENTER USER ID","ENTER PASSWORD"))
if response.status_code == 200:
print('Response status - OK ')
return response.json()
else:
print('Error making the HTTP request ',response.status_code )
return None
def main():
url = "https://api.github.com/orgs/ibm"
response_json = GithubAPI(url)
if response_json:
print('The number of public repos are : ',response_json['public_repos'])
repo_url = response_json['repos_url']
total_no = response_json['public_repos']
per_page = 100
page_count = 1
while page_count < total_no/100:
#Display 100 repos per page and traverse the pages untill we get the last page
page_url = repo_url+"?per_page=100&page_no="+str(page_count)
print(page_url)
repo_response = GithubAPI(page_url)
# Increment page number
page_count = page_count+1
for repo in repo_response:
print([repo['id'],repo['name']])
appendcsv([repo['id'],repo['name']])
main()