In [1]:
# imports libraries
import pickle										# import/export lists
import datetime										# dates
import re 											# regular expression
import pandas as pd									# dataframes

In [2]:
# opens raw data
with open ('../../data/clean_data/df_profile', 'rb') as fp:
    df = pickle.load(fp)

In [3]:
# sets current year
cyear = datetime.datetime.now().year

In [4]:
# creates tenure variable
df['tenure'] = 'NA'
hasyear = [len(row) > 2 and row[2] != 'NA' for row in df['join']]
df.loc[hasyear, 'tenure'] = [cyear - int(row[2]) for row in df.loc[hasyear, 'join']]

# creates isauthor variable
df['isauthor'] = [int(row > 0) for row in df['st']]

# creates hasprofile variable
df['hasprofile'] = [int(len(row) > 0) for row in df['profile']]

In [5]:
# writes csv file
df.to_csv('../../data/clean_data/df_profile.csv')