In [37]:
import mwtab
import pandas as pd

In [10]:
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = ('/revo_healthcare/data/processed/ST000396/'+
                'from_authors/ST000396_AN000633.txt')

In [22]:
mwtfile_gen = mwtab.read_files(local_path+project_path)

mwtfile = next(mwtfile_gen)

In [29]:
print mwtfile.keys()
mwtab_data = mwtfile['MS_METABOLITE_DATA']


['METABOLOMICS WORKBENCH', 'PROJECT', 'STUDY', 'SUBJECT', 'SUBJECT_SAMPLE_FACTORS', 'COLLECTION', 'TREATMENT', 'SAMPLEPREP', 'CHROMATOGRAPHY', 'ANALYSIS', 'MS', 'MS_METABOLITE_DATA', 'METABOLITES']

In [103]:
mwtab_data_table = data['MS_METABOLITE_DATA_START']['DATA']

class_info = dict(zip(data['MS_METABOLITE_DATA_START']['Samples'],
                 data['MS_METABOLITE_DATA_START']['Factors'], 
                ))

# convert to dataframe of info
sample_names = data['MS_METABOLITE_DATA_START']['Samples']
metadata = data['MS_METABOLITE_DATA_START']['Factors']

print metadata[0:2]

metadata[0].split('|')


metadata_nested_dict = {}
for i in range(0, len(metadata)):
    lst = metadata[i].split('|')
    dct = {}
    for j in lst:
        dct[j.split(':')[0]] = j.split(':')[1]
    metadata_nested_dict[sample_names[i]] = dct

# convert to dataframe
metadata_df = pd.DataFrame(metadata_nested_dict)

print metadata_df.index
print metadata_df.columns

# TODO tidy this up..? Or maybe say fuck it and move on


['Age Group:55-59 | Sex:Male | Smoking Status:Current | Diagnosis:Squamous cell', 'Age Group:65-69 | Sex:Male | Smoking Status:Current | Diagnosis:Squamous cell']
Index([u' Diagnosis', u' Sex', u' Smoking Status', u'Age Group'], dtype='object')
Index([u'110629bwasa32_2', u'110629bwasa33_2', u'110629bwasa34_2',
       u'110629bwasa35_2', u'110629bwasa36_2', u'110629bwasa37_2',
       u'110629bwasa38_2', u'110629bwasa39_2', u'110629bwasa40_2',
       u'110629bwasa41_2',
       ...
       u'110705bwasa21_1', u'110705bwasa22_1', u'110705bwasa23_1',
       u'110705bwasa24_1', u'110705bwasa25_1', u'110705bwasa26_1',
       u'110705bwasa27_1', u'110705bwasa28_1', u'110705bwasa29_1',
       u'110705bwasa30_1'],
      dtype='object', length=299)

In [104]:
df = pd.DataFrame(mwtab_data_table)
df = df.set_index(df['metabolite_name']).drop('metabolite_name', axis=1)
df.head()


# get response values
data['MS_METABOLITE_DATA_START']


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-104-c7f4f29ecf36> in <module>()
      5 
      6 # get response values
----> 7 data['MS_METABOLITE_DATA_START']

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/displayhook.pyc in __call__(self, result)
    244             self.start_displayhook()
    245             self.write_output_prompt()
--> 246             format_dict, md_dict = self.compute_format_data(result)
    247             self.update_user_ns(result)
    248             self.fill_exec_result(result)

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
    148 
    149         """
--> 150         return self.shell.display_formatter.format(result)
    151 
    152     # This can be set to True by the write_output_prompt method in a subclass

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
    143             md = None
    144             try:
--> 145                 data = formatter(obj)
    146             except:
    147                 # FIXME: log the exception

<decorator-gen-10> in __call__(self, obj)

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in catch_format_error(method, self, *args, **kwargs)
    188     """show traceback on failed format call"""
    189     try:
--> 190         r = method(self, *args, **kwargs)
    191     except NotImplementedError:
    192         # don't warn on NotImplementedErrors

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
    670                 type_pprinters=self.type_printers,
    671                 deferred_pprinters=self.deferred_printers)
--> 672             printer.pretty(obj)
    673             printer.flush()
    674             return stream.getvalue()

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _ordereddict_pprint(obj, p, cycle)
    829             p.text('...')
    830         elif len(obj):
--> 831             p.pretty(list(obj.items()))
    832 
    833 def _deque_pprint(obj, p, cycle):

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
    550                 p.text(',')
    551                 p.breakable()
--> 552             p.pretty(x)
    553         if len(obj) == 1 and type(obj) is tuple:
    554             # Special case for 1-item tuples.

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
    550                 p.text(',')
    551                 p.breakable()
--> 552             p.pretty(x)
    553         if len(obj) == 1 and type(obj) is tuple:
    554             # Special case for 1-item tuples.

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
    550                 p.text(',')
    551                 p.breakable()
--> 552             p.pretty(x)
    553         if len(obj) == 1 and type(obj) is tuple:
    554             # Special case for 1-item tuples.

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _ordereddict_pprint(obj, p, cycle)
    829             p.text('...')
    830         elif len(obj):
--> 831             p.pretty(list(obj.items()))
    832 
    833 def _deque_pprint(obj, p, cycle):

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
    550                 p.text(',')
    551                 p.breakable()
--> 552             p.pretty(x)
    553         if len(obj) == 1 and type(obj) is tuple:
    554             # Special case for 1-item tuples.

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    366                 if cls in self.type_pprinters:
    367                     # printer registered in self.type_pprinters
--> 368                     return self.type_pprinters[cls](obj, self, cycle)
    369                 else:
    370                     # deferred printer

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
    545             return p.text(start + '...' + end)
    546         step = len(start)
--> 547         p.begin_group(step, start)
    548         for idx, x in p._enumerate(obj):
    549             if idx:

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in begin_group(self, indent, open)
    261         """
    262         if open:
--> 263             self.text(open)
    264         group = Group(self.group_stack[-1].depth + 1)
    265         self.group_stack.append(group)

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in text(self, obj)
    209             self._break_outer_groups()
    210         else:
--> 211             self.output.write(obj)
    212             self.output_width += width
    213 

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in write(self, text)
    117         def write(self, text):
    118             return super(CUnicodeIO, self).write(
--> 119                 cast_unicode(text, encoding=get_stream_enc(sys.stdout)))
    120 
    121 

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/utils/py3compat.pyc in cast_unicode(s, encoding)
     25 def cast_unicode(s, encoding=None):
     26     if isinstance(s, bytes):
---> 27         return decode(s, encoding)
     28     return s
     29 

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/utils/py3compat.pyc in decode(s, encoding)
     16 def decode(s, encoding=None):
     17     encoding = encoding or DEFAULT_ENCODING
---> 18     return s.decode(encoding, "replace")
     19 
     20 def encode(u, encoding=None):

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/encodings/utf_8.pyc in decode(input, errors)
     13 encode = codecs.utf_8_encode
     14 
---> 15 def decode(input, errors='strict'):
     16     return codecs.utf_8_decode(input, errors, True)
     17 

KeyboardInterrupt: 

In [ ]:
data['M']