In [1]:
import pandas as pd

In [2]:
# Set up paths/ os
import os
import sys

this_path=os.getcwd()
os.chdir("../data")
sys.path.insert(0, this_path)

In [3]:
infile="MedHelp-posts.csv"
df = pd.read_csv(infile,index_col=0)
df.head(2)


Out[3]:
title text href user id mother post id
post id
1 Inappropriate Masterbation Down Syndrome \n It is common for children and adoles... http://www.medhelp.org//posts/Autism--Asperger... user_340688 1
2 Inappropriate Masterbation Down Syndrome \n A related discussion, self injusry i... http://www.medhelp.org//posts/Autism--Asperger... user_1566928 1

In [4]:
df['text']=df['text'].map(lambda x: x.strip())
df.head(1)


Out[4]:
title text href user id mother post id
post id
1 Inappropriate Masterbation Down Syndrome It is common for children and adolescents with... http://www.medhelp.org//posts/Autism--Asperger... user_340688 1

In [5]:
# Remove empty posts:
print(len(df))
df =df[df['text'].str.len()  > 3 ]
print(len(df))


1813
1811

In [6]:
outfile="MedHelp-posts-clean.csv"
df.to_csv(outfile)