Part One - APIs - Retrieve Data From LinkedIn

Install the following libraries:

  • python-linkedin: pip install python-linkedin
  • prettytable: pip install prettytable

For more on:


In [ ]:
# Import everything we're going to need

import json
import csv
from linkedin import linkedin
from prettytable import PrettyTable

In [ ]:
# Use the LinkedIn OAuth credentials from the app you created to receive an access token to access your own data

# Access credentials
CONSUMER_KEY = ''
CONSUMER_SECRET = ''
USER_TOKEN = ''
USER_SECRET = ''

# Return url: not needed for development but we'll keep it here as a placeholder
RETURN_URL = ''

# Create the authorization
authentication = linkedin.LinkedInDeveloperAuthentication(CONSUMER_KEY, 
                                                          CONSUMER_SECRET, 
                                                          USER_TOKEN, 
                                                          USER_SECRET, 
                                                          RETURN_URL,
                                                          permissions=linkedin.PERMISSIONS.enums.values())
# Create a LinkedIn app to use
li_app = linkedin.LinkedInApplication(authentication)

# Retrieve your profile
li_app.get_profile()

In [ ]:
# Show a single part of your profile
profile = li_app.get_profile()
profile['firstName']

In [ ]:
# Retrieve and save your LinkedIn connections
# The data retrieved is limited in scope
# json is built-in to Python so no need to install anything new

my_connections = li_app.get_connections()

# By default, this file will be stored in the same folder that your code runs in
# If using iPython Notebook as we are now, then it will be in the same folder that your notebook is in
my_connections_file = 'my_linkedin_connections.json'

f = open(my_connections_file, 'w')
f.write(json.dumps(my_connections, indent=1))
f.close()

print("JSON file creation complete")

In [ ]:
# You can load the connection data using the following so you don't 
# have to retrieve them until you get more connections
connections = json.loads(open(my_connections_file).read())

print(connections)

In [ ]:
# Use prettytable to nicely print your LinkedIn connections' data

# For this example we'll look at only name and location. You can easily 
# add more fields if you like
pt = PrettyTable(field_names=['Name', 'Location'])
# Left justify everything 
pt.align = 'l'

# If the person has a location, add a row for them
# If the person is keeping their information private, their name 
# will show up as "private private" with no location
# The code below will exclude those entries
for c in connections['values']:
    if c.has_key('location'):
        pt.add_row([c['firstName'] + ' ' + c['lastName'], c['location']['name']])
        
print(pt)

In [ ]:
# See http://developer.linkedin.com/documents/profile-fields#fullprofile
# for details on additional field selectors that can be passed in for retrieving additional profile information.

# Display the positions of a person in your network.

# Use an id for a connection; we'll pull the first one
connection_id = my_connections['values'][0]['id']
connection_positions = li_app.get_profile(member_id=connection_id, 
                                       selectors=['positions'])

# Dump the response to a json object. However, we can do better than that!
# print json.dumps(connection_positions, indent=1)

# Show some of the data
print("Connection: {}".format(my_connections['values'][0]['firstName'] + " " + my_connections['values'][0]['lastName']))
print("Total Positions: {} \n".format(connection_positions['positions']['_total']))

ct = PrettyTable(field_names=['Company', 'Title', 'Start Date'])
ct.align = 'l'

for c in connection_positions['positions']['values']:
    ct.add_row([c['company']['name'], c['title'], str(c['startDate']['month']) + '/' + str(c['startDate']['year'])])

print(ct)

In [ ]:
# By default the positions are sorted by the start date.
# Let's see a prettytable sorted by company name instead

print ct.get_string(sortby="Company")

In [ ]:
# Another way to get the positions of a single contact - using field selectors

connection_positions = li_app.get_profile(member_id=connection_id,selectors=['positions:(company:(name,industry,id))'])

conn_pos = PrettyTable(field_names=['Company'])
conn_pos.align = 'l'

for c in connection_positions['positions']['values']:
    conn_pos.add_row([c['company']['name']])

print(conn_pos)

In [ ]:
# Convert the json file to a csv and save the file
# Each row of the table is stored in _rows
# A CSV is more easily imported into a database than JSON, depending on the database

# Create a new prettytable with all of the available data
pt = PrettyTable(field_names=['first_name', 'last_name', 'headline', 'industry', 'picture_url', 'location', 'country', 'profile_request_url'])
pt.align = 'l'


for c in connections['values']:
    if c.has_key('location'):
        first_name = c['firstName'] if c.has_key('firstName') else ''
        last_name = c['lastName'] if c.has_key('lastName') else ''
        headline = c['headline'] if c.has_key('headline') else ''
        industry = c['industry'] if c.has_key('industry') else ''
        picture_url = c['pictureUrl'] if c.has_key('pictureUrl') else ''
        location = c['location']['name'] if c.has_key('location') else ''
        country = c['location']['country']['code'] if c.has_key('location') else ''
        profile_request_url = c['apiStandardProfileRequest']['url'] if c.has_key('apiStandardProfileRequest') else ''
        
        pt.add_row([first_name,
                    last_name, 
                    headline,
                    industry,
                    picture_url,
                    location,
                    country,
                    profile_request_url
                    ])
        
with open('linkedin_connection_data.csv', 'wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',quotechar='"', quoting=csv.QUOTE_ALL)
    writer.writerow(['first_name', 'last_name', 'headline', 'industry', 'picture_url', 'location', 'country', 'profile_request_url'])
    for r in pt._rows:
        writer.writerow([unicode(s).encode("utf-8") for s in r])

# So we know that the CSV file was indeed created
print("CSV creation complete")

In [ ]: