In [1]:
from __future__ import division
import numpy as np
from functools import partial
from scipy.stats import linregress
import matplotlib.pyplot as plt
from matplotlib import dates
import pandas as pd
import gspread
from StringIO import StringIO
import csv
import sys
sys.path.append('/home/will/PatientPicker/')
sys.path.append('/home/will/PySeqUtils/')
import LoadingTools
from GeneralSeqTools import fasta_reader, fasta_writer, seq_align_to_ref


ERROR: Line magic function `%matplotlib` not found.

In [2]:
pat_data = LoadingTools.load_redcap_data().groupby(['Patient ID', 'VisitNum']).first()

In [21]:
mask = pat_data['Current Tobacco Use'] == 'No'
pat_data['Tobacco Use (packs/year)'][mask] = pat_data['Tobacco Use (packs/year)'][mask].fillna(0)
ewms = partial(pd.ewma, span = 2)
pat_data['Smoothed-Tobacco-Use'] = pat_data['Tobacco Use (packs/year)'].groupby(level = 'Patient ID').transform(ewms)

In [22]:
cols = ['Tobacco Use (packs/year)', 'Smoothed-Tobacco-Use','Date Of Visit']
grouper = pat_data[cols].dropna().groupby(level = 'Patient ID')
fig, axs = plt.subplots(2, 1, figsize = (10, 10))
for pat, group in grouper: 
    if len(group) > 3:
        axs[0].plot_date(group['Date Of Visit'], group['Tobacco Use (packs/year)'], '-')
        axs[1].plot_date(group['Date Of Visit'], group['Smoothed-Tobacco-Use'], '-')
axs[0].set_ylabel('packs/year')
axs[1].set_ylabel('Smoothed packs/year')


Out[22]:
<matplotlib.text.Text at 0x85f4510>

In [23]:
from sklearn.covariance import EllipticEnvelope
cytos = sorted(['IL.8','VEGF','IL.1beta',
        'G.CSF','EGF','IL.10','HGF',
        'FGF.basic','IFN.alpha','IL.6',
        'IL.12','Rantes','Eotaxin',
        'GM.CSF','MIP.1beta',
        'MCP.1','IL.5','IL.13', 'IFN.gamma','TNF.alpha',
        'IL.RA','IL.2','IL.7','IP.10',
        'IL.2R','MIG','IL.4','IL.15',
        'IL.17','MIP.1alpha']) + ['Th1', 'Th2']

cyto_data_raw = pd.read_csv('/home/will/HIVSystemsBio/NewCytokineAnalysis/CytoRawData.csv', sep = '\t')
cyto_data_raw['Th1'] = cyto_data_raw['IFN.gamma'] + \
                            cyto_data_raw['IL.2']+cyto_data_raw['TNF.alpha']
cyto_data_raw['Th2'] = cyto_data_raw['IL.4'] + \
                            cyto_data_raw['IL.5']+cyto_data_raw['IL.10']

In [25]:
cyto_data = cyto_data_raw.groupby(['Patient ID', 'VisitNum']).mean()
tranfer_cols = ['Log-Latest-VL', 
                'Keep',
                'IsMale',
                'Race-Black',
                'Age',
                'HAART-Naive',
                'HAART-Non-Adherent',
                'HAART-Off',
                'HAART-On',
                'Hepatitis C status (HCV)']
for col in tranfer_cols:
    _, cyto_data[col] = cyto_data.align(pat_data[col], join='left', axis = 0)
cyto_data['HCV'] = cyto_data['Hepatitis C status (HCV)']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-25-f1f5b637f7f6> in <module>()
     11                 'Hepatitis C status (HCV)']
     12 for col in tranfer_cols:
---> 13     _, cyto_data[col] = cyto_data.align(pat_data[col], join='left', axis = 0)
     14 cyto_data['HCV'] = cyto_data['Hepatitis C status (HCV)']

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
   2001             # get column
   2002             if self.columns.is_unique:
-> 2003                 return self._get_item_cache(key)
   2004 
   2005             # duplicate columns

/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
    665             return cache[item]
    666         except Exception:
--> 667             values = self._data.get(item)
    668             res = self._box_item_values(item, values)
    669             cache[item] = res

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item)
   1653     def get(self, item):
   1654         if self.items.is_unique:
-> 1655             _, block = self._find_block(item)
   1656             return block.get(item)
   1657         else:

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in _find_block(self, item)
   1933 
   1934     def _find_block(self, item):
-> 1935         self._check_have(item)
   1936         for i, block in enumerate(self.blocks):
   1937             if item in block:

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in _check_have(self, item)
   1940     def _check_have(self, item):
   1941         if item not in self.items:
-> 1942             raise KeyError('no item named %s' % com.pprint_thing(item))
   1943 
   1944     def reindex_axis(self, new_axis, method=None, axis=0, copy=True):

KeyError: u'no item named Log-Latest-VL'

In [ ]: