In [6]:
from __future__ import division
import numpy as np
from scipy.stats import linregress
import matplotlib.pyplot as plt
from matplotlib import dates
import pandas as pd
import gspread
from StringIO import StringIO
import csv
import sys
sys.path.append('/home/will/PatientPicker/')
sys.path.append('/home/will/PySeqUtils/')
import LoadingTools
from GeneralSeqTools import fasta_reader, fasta_writer, seq_align_to_ref
%matplotlib inline
In [2]:
pat_data = LoadingTools.load_redcap_data().groupby(['Patient ID', 'VisitNum']).first()
In [3]:
mask = pat_data['Current Tobacco Use'] == 'No'
pat_data['Tobacco Use (packs/year)'][mask] = pat_data['Tobacco Use (packs/year)'][mask].fillna(0)
pat_data['Tobacco Use (packs/year)'].describe()
Out[3]:
In [9]:
grouper = pat_data[['Tobacco Use (packs/year)', 'Date Of Visit']].groupby(level = 'Patient ID')
fig, ax = plt.subplots(figsize = (10, 5))
plt.sca(ax)
for pat, group in grouper:
if len(group) > 3:
plt.plot_date(group['Date Of Visit'], group['Tobacco Use (packs/year)'], '-')
Out[9]:
In [14]:
fig
Out[14]:
In [ ]: