In [1]:
%pylab inline
import seaborn as sns
In [2]:
import warnings
warnings.filterwarnings("ignore")
In [3]:
import pandas as pd
In [8]:
names = ["ID","Spectral_Type","[3.6] (error)","[4.5] (error)","[5.8] (error)","[8.0] (error)","OTHER DESIGNATION"]
tbl1 = pd.read_csv("http://iopscience.iop.org/1538-4357/634/1/L113/fulltext/19839.tb1.txt", header=0,
na_values="\ldots", names = names, sep='\t')
tbl1.head(1)
Out[8]:
Ugh, it's one of these tables that puts the uncertainty in parentheses adjacent to the value. Looks nice in a table, but is shitty for parsing.
Luckily we have computers, functions, and for loops.
In [5]:
def strip_parentheses(col, df):
'''
splits single column strings of "value (error)" into two columns of value and error
input:
-string name of column to split in two
-dataframe to apply to
returns dataframe
'''
out1 = df[col].str.replace(")","").str.split(pat="(")
df_out = out1.apply(pd.Series)
# Split the string on the whitespace
base, sufx = col.split(" ")
df[base] = df_out[0].copy()
df[base+"_e"] = df_out[1].copy()
del df[col]
return df
In [6]:
cols_to_fix = [col for col in tbl1.columns.values if "(error)" in col]
for col in cols_to_fix:
print col
tbl1 = strip_parentheses(col, tbl1)
In [7]:
tbl1.head()
Out[7]:
The end.