In [6]:
from __future__ import division
import numpy as np
from scipy.stats import linregress
import matplotlib.pyplot as plt
from matplotlib import dates
import pandas as pd
import gspread
from StringIO import StringIO
import csv
import sys
sys.path.append('/home/will/PatientPicker/')
sys.path.append('/home/will/PySeqUtils/')
import LoadingTools
from GeneralSeqTools import fasta_reader, fasta_writer, seq_align_to_ref
%matplotlib inline


ERROR: Line magic function `%matplotlib` not found.

In [2]:
pat_data = LoadingTools.load_redcap_data().groupby(['Patient ID', 'VisitNum']).first()

In [3]:
mask = pat_data['Current Tobacco Use'] == 'No'
pat_data['Tobacco Use (packs/year)'][mask] = pat_data['Tobacco Use (packs/year)'][mask].fillna(0)
pat_data['Tobacco Use (packs/year)'].describe()


Out[3]:
count    1215.000000
mean      168.701599
std       151.579513
min         0.000000
25%        54.786330
50%       127.834770
75%       182.621099
max      1095.726596
dtype: float64

In [9]:
grouper = pat_data[['Tobacco Use (packs/year)', 'Date Of Visit']].groupby(level = 'Patient ID')
fig, ax = plt.subplots(figsize = (10, 5))
plt.sca(ax)
for pat, group in grouper:
    if len(group) > 3:
        plt.plot_date(group['Date Of Visit'], group['Tobacco Use (packs/year)'], '-')


Out[9]:
[<matplotlib.lines.Line2D at 0x5439b90>]
<matplotlib.figure.Figure at 0x5799f50>

In [14]:
fig


Out[14]:
<matplotlib.figure.Figure at 0x6556650>

In [ ]: