Part One - APIs - Create a Combined Profile

  • Use of this notebook assumes that you have already retrieved data from your LinkedIn and Twitter networks

In [ ]:
# Import everything we need
import pandas as pd
from titlecase import titlecase

In [ ]:
# Import the LinkedIn and Twitter data into individual pandas dataframes

# Define the paths to the files
# If you named your files differently, change the names
linkedin_data_file = 'linkedin_connection_data.csv'
twitter_data_file = 'twitter_user_data.csv'

# Create the dataframes by reading from the CSV files
ldf = pd.read_csv(linkedin_data_file)
tdf = pd.read_csv(twitter_data_file)

In [ ]:
print(ldf)

In [ ]:
print(tdf)

In [ ]:
# Do some minor formatting of the data so we can join the dataframes together

# Titlecase anything
def titlecase_anything(thing):
    try:
        thing = titlecase(thing)
    except:
        pass
    return thing

# Create a full name from the first and last
def create_full_name(first_name, last_name):
    return (first_name + " " + last_name)

# Here's why we're using pandas - to apply our functions to the entire dataframe at once

# First we'll titlecase the first_name and last_name columns that came from LinkedIn
# Note that we don't have to pass in the variable that the method requires - it happens automatically
ldf.first_name = ldf.first_name.apply(titlecase_anything)
ldf.last_name = ldf.last_name.apply(titlecase_anything)

# Second, because Twitter provides us a full name, if the user provided it, we'll create a full_name in our LI data
ldf['name'] = ldf.first_name + " " + ldf.last_name

In [ ]:
print(tdf)

In [ ]:
print(ldf)

In [ ]:
# Perform a database-style left join on our dataframes using the merge function from pandas
# Because we're doing a left join only matching records will be merged. All others are filled in with NaN
mdf = tdf.merge(ldf, on='name', how='left')
mdf # Fooled you here. In iPython Notebook we don't have to call the print function to see our variables

In [ ]:
# Let's turn our new dataset into a CSV file. One liners FTW!
# Note that we're keeping even the "unmatched" data so that perhaps we can fill it in later.

mdf.to_csv('twitter_and_linkedin_merged_data.csv', sep=',', encoding='utf-8')
print("CSV creation complete")

In [ ]: