notebook.community

Edit and run



In [1]:

    
# imports libraries
import pickle										# import/export lists
import datetime										# dates
import re 											# regular expression
import pandas as pd									# dataframes



In [2]:

    
# opens raw data
with open ('../../data/clean_data/df_profile', 'rb') as fp:
    df = pickle.load(fp)



In [3]:

    
# sets current year
cyear = datetime.datetime.now().year



In [4]:

    
# creates tenure variable
df['tenure'] = 'NA'
hasyear = [len(row) > 2 and row[2] != 'NA' for row in df['join']]
df.loc[hasyear, 'tenure'] = [cyear - int(row[2]) for row in df.loc[hasyear, 'join']]

# creates isauthor variable
df['isauthor'] = [int(row > 0) for row in df['st']]

# creates hasprofile variable
df['hasprofile'] = [int(len(row) > 0) for row in df['profile']]



In [5]:

    
# writes csv file
df.to_csv('../../data/clean_data/df_profile.csv')