In [37]:
import mwtab
import pandas as pd
In [10]:
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = ('/revo_healthcare/data/processed/ST000396/'+
'from_authors/ST000396_AN000633.txt')
In [22]:
mwtfile_gen = mwtab.read_files(local_path+project_path)
mwtfile = next(mwtfile_gen)
In [29]:
print mwtfile.keys()
mwtab_data = mwtfile['MS_METABOLITE_DATA']
['METABOLOMICS WORKBENCH', 'PROJECT', 'STUDY', 'SUBJECT', 'SUBJECT_SAMPLE_FACTORS', 'COLLECTION', 'TREATMENT', 'SAMPLEPREP', 'CHROMATOGRAPHY', 'ANALYSIS', 'MS', 'MS_METABOLITE_DATA', 'METABOLITES']
In [103]:
mwtab_data_table = data['MS_METABOLITE_DATA_START']['DATA']
class_info = dict(zip(data['MS_METABOLITE_DATA_START']['Samples'],
data['MS_METABOLITE_DATA_START']['Factors'],
))
# convert to dataframe of info
sample_names = data['MS_METABOLITE_DATA_START']['Samples']
metadata = data['MS_METABOLITE_DATA_START']['Factors']
print metadata[0:2]
metadata[0].split('|')
metadata_nested_dict = {}
for i in range(0, len(metadata)):
lst = metadata[i].split('|')
dct = {}
for j in lst:
dct[j.split(':')[0]] = j.split(':')[1]
metadata_nested_dict[sample_names[i]] = dct
# convert to dataframe
metadata_df = pd.DataFrame(metadata_nested_dict)
print metadata_df.index
print metadata_df.columns
# TODO tidy this up..? Or maybe say fuck it and move on
['Age Group:55-59 | Sex:Male | Smoking Status:Current | Diagnosis:Squamous cell', 'Age Group:65-69 | Sex:Male | Smoking Status:Current | Diagnosis:Squamous cell']
Index([u' Diagnosis', u' Sex', u' Smoking Status', u'Age Group'], dtype='object')
Index([u'110629bwasa32_2', u'110629bwasa33_2', u'110629bwasa34_2',
u'110629bwasa35_2', u'110629bwasa36_2', u'110629bwasa37_2',
u'110629bwasa38_2', u'110629bwasa39_2', u'110629bwasa40_2',
u'110629bwasa41_2',
...
u'110705bwasa21_1', u'110705bwasa22_1', u'110705bwasa23_1',
u'110705bwasa24_1', u'110705bwasa25_1', u'110705bwasa26_1',
u'110705bwasa27_1', u'110705bwasa28_1', u'110705bwasa29_1',
u'110705bwasa30_1'],
dtype='object', length=299)
In [104]:
df = pd.DataFrame(mwtab_data_table)
df = df.set_index(df['metabolite_name']).drop('metabolite_name', axis=1)
df.head()
# get response values
data['MS_METABOLITE_DATA_START']
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-104-c7f4f29ecf36> in <module>()
5
6 # get response values
----> 7 data['MS_METABOLITE_DATA_START']
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/displayhook.pyc in __call__(self, result)
244 self.start_displayhook()
245 self.write_output_prompt()
--> 246 format_dict, md_dict = self.compute_format_data(result)
247 self.update_user_ns(result)
248 self.fill_exec_result(result)
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
148
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 # This can be set to True by the write_output_prompt method in a subclass
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
143 md = None
144 try:
--> 145 data = formatter(obj)
146 except:
147 # FIXME: log the exception
<decorator-gen-10> in __call__(self, obj)
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in catch_format_error(method, self, *args, **kwargs)
188 """show traceback on failed format call"""
189 try:
--> 190 r = method(self, *args, **kwargs)
191 except NotImplementedError:
192 # don't warn on NotImplementedErrors
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
670 type_pprinters=self.type_printers,
671 deferred_pprinters=self.deferred_printers)
--> 672 printer.pretty(obj)
673 printer.flush()
674 return stream.getvalue()
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _ordereddict_pprint(obj, p, cycle)
829 p.text('...')
830 elif len(obj):
--> 831 p.pretty(list(obj.items()))
832
833 def _deque_pprint(obj, p, cycle):
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
550 p.text(',')
551 p.breakable()
--> 552 p.pretty(x)
553 if len(obj) == 1 and type(obj) is tuple:
554 # Special case for 1-item tuples.
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
550 p.text(',')
551 p.breakable()
--> 552 p.pretty(x)
553 if len(obj) == 1 and type(obj) is tuple:
554 # Special case for 1-item tuples.
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
550 p.text(',')
551 p.breakable()
--> 552 p.pretty(x)
553 if len(obj) == 1 and type(obj) is tuple:
554 # Special case for 1-item tuples.
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _ordereddict_pprint(obj, p, cycle)
829 p.text('...')
830 elif len(obj):
--> 831 p.pretty(list(obj.items()))
832
833 def _deque_pprint(obj, p, cycle):
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
550 p.text(',')
551 p.breakable()
--> 552 p.pretty(x)
553 if len(obj) == 1 and type(obj) is tuple:
554 # Special case for 1-item tuples.
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
366 if cls in self.type_pprinters:
367 # printer registered in self.type_pprinters
--> 368 return self.type_pprinters[cls](obj, self, cycle)
369 else:
370 # deferred printer
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in inner(obj, p, cycle)
545 return p.text(start + '...' + end)
546 step = len(start)
--> 547 p.begin_group(step, start)
548 for idx, x in p._enumerate(obj):
549 if idx:
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in begin_group(self, indent, open)
261 """
262 if open:
--> 263 self.text(open)
264 group = Group(self.group_stack[-1].depth + 1)
265 self.group_stack.append(group)
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in text(self, obj)
209 self._break_outer_groups()
210 else:
--> 211 self.output.write(obj)
212 self.output_width += width
213
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/lib/pretty.pyc in write(self, text)
117 def write(self, text):
118 return super(CUnicodeIO, self).write(
--> 119 cast_unicode(text, encoding=get_stream_enc(sys.stdout)))
120
121
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/utils/py3compat.pyc in cast_unicode(s, encoding)
25 def cast_unicode(s, encoding=None):
26 if isinstance(s, bytes):
---> 27 return decode(s, encoding)
28 return s
29
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/IPython/utils/py3compat.pyc in decode(s, encoding)
16 def decode(s, encoding=None):
17 encoding = encoding or DEFAULT_ENCODING
---> 18 return s.decode(encoding, "replace")
19
20 def encode(u, encoding=None):
/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/encodings/utf_8.pyc in decode(input, errors)
13 encode = codecs.utf_8_encode
14
---> 15 def decode(input, errors='strict'):
16 return codecs.utf_8_decode(input, errors, True)
17
KeyboardInterrupt:
In [ ]:
data['M']
Content source: irockafe/revo_healthcare
Similar notebooks: