In [1]:
import pandas as pd
In [2]:
# Set up paths/ os
import os
import sys
this_path=os.getcwd()
os.chdir("../data")
sys.path.insert(0, this_path)
In [3]:
infile="MedHelp-posts.csv"
df = pd.read_csv(infile,index_col=0)
df.head(2)
Out[3]:
In [4]:
df['text']=df['text'].map(lambda x: x.strip())
df.head(1)
Out[4]:
In [5]:
# Remove empty posts:
print(len(df))
df =df[df['text'].str.len() > 3 ]
print(len(df))
In [6]:
outfile="MedHelp-posts-clean.csv"
df.to_csv(outfile)