First, please specify a local directory where we can save the data to.
In [2]:
%cd /Users/elizabethasmith/Desktop/job_stuff/data_science/Mango_Health
The script 'getSQLdata.py' will pull data from the MySQL database using the following syntax:
If you successfully pull data, you will get a notification summary.
In [144]:
%run getSQLdata.py Mango_Health data_table2
If you have data stored in a csv file then you can pull it directly into the IPython environment with a pandas function:
In [17]:
global df
Check that this is the data you are intending to pull by displaying the data as a table.
First let's look at basic things, such as the rate of compliance.
In [10]:
from datetime import datetime, timedelta
TookMed = df.actual != ''
TrueIsOne = 1*TookMed
df['TookMed'] = TookMed
NumTaken = sum(TrueIsOne)
NumSched = len(TrueIsOne)
NumNotTaken = NumSched-NumTaken
PercTaken = 100*float(NumTaken)/float(NumSched)
DeltaTime = df.date.max() - df.date.min()
print "The user took their medications %d of %d, or %.1f%%, of the time they were scheduled to over the course of %d days." % (NumTaken, NumSched, PercTaken, DeltaTime.days)
In [405]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
mpl.style.use('ggplot')
data = (NumNotTaken, NumTaken)
#print data
sizes = pd.Series(data, index = ('Missed','Taken'))
sizes.plot(kind='pie', autopct='%.1f%%', fontsize=14, figsize=(7, 7), colors=('dimgray','#F2F2F2'))
plt.title("How many of the user's scheduled medication doses were eventually taken?", fontsize=16)
plt.show()
First let's format our pandas dataframe to make it easier to perform facile exploratory data analysis.
In [148]:
from datetime import datetime
import time
import datetime as dt
a = np.zeros((len(TookMed),1,))
#date stored in datetime.date
df['DayofWeek'] = NaN
df['Day'] = NaN
df['Month'] = NaN
df['Year'] = NaN
df['timeshift_hrs'] = 0
for index, row in df.iterrows():
dateraw = row['date']
df.ix[index,'DayofWeek'] = dateraw.strftime("%A")
df.ix[index,'Day'] = dateraw.day
df.ix[index,'Month'] = dateraw.month
df.ix[index,'Year'] = dateraw.year
Took = row['TookMed']
if Took:
timeStr1 = row['sched']
t1 = time.strptime(timeStr1, '%H:%M')
t1h = float(t1[3]) + float(t1[4])/60
timeStr2 = row['actual']
t2 = time.strptime(timeStr2, '%H:%M')
t2h = float(t2[3]) + float(t2[4])/60
tdiff = t2h - t1h
df.ix[index, 'timeshift_hrs'] = tdiff
df
What does compliance look like over that timescale?
In [149]:
import numpy as np
import matplotlib.pyplot as plt
#import matplotlib.collections as collections
dfdatecout = df[['date']]
dfdatecout['Doses Scheduled'] = transpose([np.ones((len(dfdatecout)))])
dfdatecout['Doses Taken'] = TrueIsOne
dfdatecout['Doses Missed'] = dfdatecout.loc[:,('Doses Scheduled')]-dfdatecout.loc[:,('Doses Taken')]
dfdatecout['Cumulative Doses Missed'] = dfdatecout.loc[:,('Doses Missed')].cumsum()
dfdatecout['timeshift_hrs'] = df.loc[:,('timeshift_hrs')]
dfdatecout['drug'] = df.loc[:,('drug')]
In [150]:
groupbydate = dfdatecout.loc[:,('date','Doses Scheduled', 'Doses Taken', 'Doses Missed')]
groupbydate = groupbydate.groupby(['date'])
groupbydate = groupbydate.sum()
def _sum(x):
if len(x) == 0: return 0
else: return sum(x)
groupbydate.index = pd.to_datetime(groupbydate.index)
groupbydate = groupbydate.resample('D', how = _sum)
MissedCumSum = groupbydate['Doses Missed'].cumsum()
groupbydate['Cumulative Doses Missed'] = MissedCumSum
#groupbydate
In [151]:
groupbydate[['Doses Missed','Doses Scheduled','Cumulative Doses Missed']].plot(kind='line', stacked=False, secondary_y=['Cumulative Doses Missed'], ylim=(0, 3), linewidth = 2, figsize=(17, 7), color = ('dimgrey','w', 'k'), alpha = 1)
plt.title('All Medications')
plt.show()
Compliance varied as a function of time; generally-speaking, the frequency with which the user missed scheduled doses increased as time went on.
In [152]:
groupdrug = dfdatecout.groupby('drug')
NumDrugs = len(groupdrug)
print "The user took %d different drugs. Their names are listed below:" % (NumDrugs)
for name, group in groupdrug:
print(name)
# print(group)
In [153]:
Adf = groupdrug.get_group('atorvastatin')
MissedCumSumA = Adf['Doses Missed'].cumsum()
Adf['Cumulative Doses Missed'] = MissedCumSumA
Ldf = groupdrug.get_group('lisinopril')
MissedCumSumL = Ldf['Doses Missed'].cumsum()
Ldf['Cumulative Doses Missed'] = MissedCumSumL
DrugList = dfdatecout['drug'].values
DrugListA = DrugList == 'atorvastatin' #True = dose of astorvastatin scheduled / False = dose of lisinopril scheduled
DrugListL = DrugList == 'lisinopril' #True = dose of lisinopril scheduled / False = dose of astorvastatin scheduled
DrugListA1 = 1*DrugListA #1 = dose of astorvastatin scheduled / 0 = dose of lisinopril scheduled
NDrASched = len(DrugListA1[DrugListA1==1]) #Total number of instances where astorvastatin was scheduled
DrugListL1 = 1*DrugListL #1 = dose of lisinopril scheduled / 0 = dose of astorvastatin scheduled
NDrLSched = len(DrugListL1[DrugListL1==1]) #Total number of instances where lisinopril was scheduled
MissedAll = dfdatecout['Doses Missed'].values
CumSumAll = dfdatecout['Cumulative Doses Missed'].values
AllDates = dfdatecout['date'].values #list of all dates (incl. duplicates if two drugs scheduled per day)
TotMissedAll = max(CumSumAll) #Total number of instances where a dose of a drug was scheduled and missed
fMissedCum = CumSumAll/TotMissedAll
Missed50Cum = min(AllDates[fMissedCum >= 0.5]) #Date where 50% of total instances where a dose of a drug was scheduled had been missed
AMissedAll = MissedAll*DrugListA1 #1 = dose of astorvastatin missed / 0 = otherwise
AMissedCum = AMissedAll.cumsum() #cumulative sum of doses of astorvastatin missed
ATotMissed = max(AMissedCum) #total number of doses of astorvastatin missed
fAMissedCum = AMissedCum/ATotMissed
AMissed50Cum = min(AllDates[fAMissedCum >= 0.5]) #Date where 50% of total instances where a dose of astorvastatin was scheduled had been missed
AAllDates = AllDates[DrugListA] #list of all dates where a dose of atorvastatin was scheduled
minADates = min(AAllDates) #date when the first dose of atorvastatin was scheduled
maxADates = max(AAllDates) #date when the last dose of atorvastatin was scheduled
LMissedAll = MissedAll*DrugListL1 #1 = dose of lisinopril missed / 0 = otherwise
LMissedCum = LMissedAll.cumsum() #cumulative sum of doses of astorvastatin missed
LTotMissed = max(LMissedCum) #total number of doses of astorvastatin missed
fLMissedCum = LMissedCum/LTotMissed
LMissed50Cum = min(AllDates[fLMissedCum >= 0.5]) #Date where 50% of total instances where a dose of lisinopril was scheduled had been missed
LAllDates = AllDates[DrugListL] #list of all dates where a dose of lisinopril was scheduled
minLDates = min(LAllDates) #date when the first dose of lisinopril was scheduled
maxLDates = max(LAllDates) #date when the last dose of lisinopril was scheduled
In [408]:
All = (TotMissedAll, NumTaken)
DrA = (ATotMissed, NDrASched-ATotMissed)
DrL = (LTotMissed, NDrLSched-LTotMissed)
DrAL = (ATotMissed, NDrASched-ATotMissed, LTotMissed, NDrLSched-LTotMissed)
#print All
#print DrA
#print DrL
plt.figure(num=1, figsize=(13, 5), dpi=200, facecolor='w', edgecolor='k')
plt.suptitle('Overall Percentage of Scheduled Medication Doses Taken', fontsize=16)
plt.subplot(1, 3, 1)
plt.pie(All, labels = ('Missed (77)','Taken (538)'), autopct='%1.1f%%', startangle=90, colors=('dimgray','#F2F2F2'))
plt.axis('equal') # Set aspect ratio to be equal so that pie is drawn as a circle.
plt.title('all medications', fontsize = 14)
#plt.subplot(1, 3, 1)
#plt.pie(DrAL, labels = ('atorvastatin (Missed) ','atorvastatin (Took)', ' lisinopril (Took)', 'lisinopril (Missed)'), autopct='%1.1f%%', startangle=90, colors=('orangered','#EBC4B5','steelblue','#BCD3E3'))
#plt.axis('equal')
#plt.title('Medications Taken', fontsize = 14)
plt.subplot(1, 3, 2)
plt.pie(DrA, labels = ('Missed (65)','Taken (346)'), autopct='%1.1f%%', startangle=90, colors=('orangered','#EBC4B5'))
plt.axis('equal')
plt.title('atorvastatin', fontsize = 14)
plt.subplot(1, 3, 3)
plt.pie(DrL, labels = ('Missed (12)','Taken (192)'), autopct='%1.1f%%', startangle=90, colors=('steelblue','#BCD3E3'))
plt.axis('equal')
plt.title('lisinopril', fontsize = 14)
plt.subplots_adjust(top=0.85)
plt.show()
In [155]:
Agroupbydate = Adf.loc[::]
Agroupbydate = Agroupbydate.groupby(['date'])
Agroupbydate = Agroupbydate.sum()
Agroupbydate.index = pd.to_datetime(Agroupbydate.index)
Agroupbydate = Agroupbydate.resample('D', how = _sum)
AMissedCumSum = Agroupbydate['Doses Missed'].cumsum()
Agroupbydate['Cumulative Doses Missed'] = AMissedCumSum
Agroupbydate[['Doses Missed','Doses Scheduled','Cumulative Doses Missed']].plot(kind='line', stacked=False, secondary_y=['Cumulative Doses Missed'], ylim=(0, 3), linewidth = 2, figsize=(17, 7), color =('orangered','#EBC4B5','k'), alpha = 1)
plt.title('atorvastatin')
plt.show()
In [156]:
from datetime import timedelta
AgroupbydateDates = Agroupbydate.index
noA1 = AgroupbydateDates[Agroupbydate['Doses Scheduled']==0]
tg1 = noA1[0]- timedelta(1)
last = len(noA1)
tg2 = noA1[last-1] + timedelta(1)
#print tg1, tg2
In [157]:
Lgroupbydate = Ldf.loc[::]
Lgroupbydate = Lgroupbydate.groupby(['date'])
Lgroupbydate = Lgroupbydate.sum()
Lgroupbydate.index = pd.to_datetime(Lgroupbydate.index)
Lgroupbydate = Lgroupbydate.resample('D', how = _sum)
LMissedCumSum = Lgroupbydate['Doses Missed'].cumsum()
Lgroupbydate['Cumulative Doses Missed'] = LMissedCumSum
Lgroupbydate[['Doses Missed','Doses Scheduled','Cumulative Doses Missed']].plot(kind='line', stacked=False, secondary_y=['Cumulative Doses Missed'], ylim=(0, 3), linewidth = 1.5, figsize=(17, 7), color =('steelblue','#BCD3E3','k'), alpha = 1)
plt.title('atorvastatin')
plt.show()
In [391]:
plt.figure(num=1, figsize=(13, 5), dpi=200, facecolor='w', edgecolor='k')
ax1 = plt.subplot2grid((6, 1), (0,0))
ax1 = plt.bar(AllDates, AMissedAll, width = 1, color = 'orangered', alpha = 1)
ax1 = plt.axvspan(minADates, tg1, facecolor='orangered', alpha=0.1)
ax1 = plt.axvspan(tg2, maxADates, facecolor='orangered', alpha=0.1)
ax1 = plt.xticks([])
ax1 = plt.ylim(0, 1)
ax1 = plt.yticks([0, 1])
ax1 = ylabel(' Doses', fontsize = 14)
ax1 = plt.title('User Compliance with Scheduled Medication Doses by Date', fontsize = 16)
ax2 = plt.subplot2grid((6, 1), (1,0))
ax2 = plt.bar(AllDates, LMissedAll, width = 1, color = 'steelblue', alpha = 1)
ax2 = plt.axvspan(minLDates, maxLDates, facecolor='steelblue', alpha=0.1)
ax2 = plt.xticks([])
ax2 = plt.ylim(0, 1)
ax2 = plt.yticks([0, 1])
ax2 = ylabel('Missed', fontsize = 14)
ax3 = plt.subplot2grid((6, 1), (2,0), rowspan=4)
ax3 = plt.plot(AllDates, AMissedCum, color = 'orangered', label = 'atorvastatin', linewidth=1.5, alpha = 1)
ax3 = plt.plot(AllDates, LMissedCum, color = 'steelblue', label = 'lisinopril', linewidth=1.5, alpha = 1)
ax3 = plt.plot(AllDates, CumSumAll, color = 'k', label = 'both', linewidth=1.5, alpha = 1)
ax3 = plt.axvspan(minADates, tg1, facecolor='orangered', alpha=0.1)
ax3 = plt.axvspan(tg2, maxADates, facecolor='orangered', alpha=0.1)
ax3 = plt.axvspan(minLDates, maxLDates, facecolor='steelblue', alpha=0.1)
ax3 = legend(loc="upper left")
ax3 = plt.axhline(y=ATotMissed/2, color = 'orangered',linestyle='--', linewidth=0.5, alpha = 1)
ax3 = plt.axhline(y=LTotMissed/2, color = 'steelblue',linestyle='--', linewidth=0.5, alpha = 1)
ax3 = plt.axhline(y=TotMissedAll/2, color = 'k',linestyle='--', linewidth=0.5, alpha = 1)
ax3 = plt.axvline(x=AMissed50Cum, color = 'orangered', linestyle=':', linewidth=1.5, alpha = 1)
ax3 = plt.axvline(x=LMissed50Cum, color = 'steelblue', linestyle=':', linewidth=1.5, alpha = 1)
ax3 = plt.axvline(x=Missed50Cum, color = 'k', linestyle=':', linewidth=1.5, alpha = 1)
ax3 = plt.ylim(0, 85)
ax3 = plt.yticks([0,20,40,60,80])
ax3 = xlabel('Date', fontsize = 14)
ax3 = ylabel('Sum of Missed Doses', fontsize = 14)
plt.subplots_adjust(top=0.85)
ax3 = plt.show()
#savefig("plot_2.png")
Dates that are shaded with transparent orange and blue signify that doses of atorvastatin and/or lisinopril, respectively were recorded as scheduled.
The frequency with which the user missed doses of lisinopril and/or atorvastatin was lowest from late July 2013 through mid-January, 2014. They were infrequently missed (only several missed doses during this 6-month period, and when one dose was missed the other was typically also missed.
The freqency of missed atorvastatin doses increased around January 2014, dropped during the month of March, then increased sharply in May/June 2014.
In [159]:
dfdatecount2 = dfdatecout.copy()
#dfdatecount2
In [160]:
dfdatecount2 = dfdatecout.copy()
SummaryAll2 = dfdatecount2
SummaryAll2['DayofWeek'] = NaN
SummaryAll2['DayofWeekN'] = NaN
SummaryAll2['Day'] = NaN
SummaryAll2['Month'] = NaN
SummaryAll2['Year'] = NaN
SummaryAll2['WeekN'] = NaN
MinDate = SummaryAll2.date.min()
MinDateWeek = MinDate.isocalendar()[1]
MaxDate = SummaryAll2.date.max()
for index, row in SummaryAll2.iterrows():
dateraw = row['date']
SummaryAll2.ix[index,'DayofWeek'] = dateraw.strftime("%A")
SummaryAll2.ix[index,'DayofWeekN'] = dateraw.weekday()
SummaryAll2.ix[index,'Day'] = dateraw.day
SummaryAll2.ix[index,'Month'] = dateraw.month
SummaryAll2.ix[index,'Year'] = dateraw.year
SummaryAll2.ix[index,'WeekN'] = dateraw.isocalendar()[1]
#SummaryAll2
In [161]:
SummaryAll2t = SummaryAll2[['Doses Scheduled','Doses Taken','Doses Missed','DayofWeekN','DayofWeek']]
groupbyDOW = SummaryAll2t.groupby(['DayofWeekN','DayofWeek'])
groupbyDOWcomp = groupbyDOW.sum()
groupbyDOW = groupbyDOWcomp.reset_index(level='DayofWeekN')
groupbyDOWr = groupbyDOW[['Doses Scheduled','Doses Taken','Doses Missed']]
groupbyDOWr['Fraction of Doses Missed'] = groupbyDOWr['Doses Missed']/groupbyDOWr['Doses Scheduled']
#groupbyDOWr
groupbyDOWallT = groupbyDOW['Doses Taken'].values
groupbyDOWallM = groupbyDOW['Doses Missed'].values
groupbyDOWallRI = groupbyDOW.copy()
groupbyDOWallRI = groupbyDOWallRI.reset_index()
DOW = groupbyDOWallRI['DayofWeek'].values #Array of Days of week: M,T,W,R,F,SAT,SUN
DOWn = groupbyDOWallRI['DayofWeekN'].values #Array of Days of week: M,T,W,R,F,SAT,SUN
SummaryAll2A = SummaryAll2[SummaryAll2['drug']=='atorvastatin']
SummaryAll2At = SummaryAll2A[['Doses Scheduled','Doses Taken','Doses Missed','DayofWeekN','DayofWeek']]
groupbyDOWA = SummaryAll2At.groupby(['DayofWeekN','DayofWeek'])
groupbyDOWcompA = groupbyDOWA.sum()
groupbyDOWA = groupbyDOWcompA.reset_index(level='DayofWeekN')
groupbyDOWrA = groupbyDOWA[['Doses Scheduled','Doses Taken','Doses Missed']]
groupbyDOWrA['Fraction of Doses Missed'] = groupbyDOWrA['Doses Missed']/groupbyDOWrA['Doses Scheduled']
#groupbyDOWrA
groupbyDOWallTA = groupbyDOWA['Doses Taken'].values
groupbyDOWallMA = groupbyDOWA['Doses Missed'].values
SummaryAll2L = SummaryAll2[SummaryAll2['drug']=='lisinopril']
SummaryAll2Lt = SummaryAll2L[['Doses Scheduled','Doses Taken','Doses Missed','DayofWeekN','DayofWeek']]
groupbyDOWL = SummaryAll2Lt.groupby(['DayofWeekN','DayofWeek'])
groupbyDOWcompL = groupbyDOWL.sum()
groupbyDOWL = groupbyDOWcompL.reset_index(level='DayofWeekN')
groupbyDOWrL = groupbyDOWL[['Doses Scheduled','Doses Taken','Doses Missed']]
groupbyDOWrL['Fraction of Doses Missed'] = groupbyDOWrL['Doses Missed']/groupbyDOWrL['Doses Scheduled']
#groupbyDOWrL
groupbyDOWallTL = groupbyDOWL['Doses Taken'].values
groupbyDOWallML = groupbyDOWL['Doses Missed'].values
In [535]:
DOWtup = tuple(DOW)
width = 1
plt.figure(num=1, figsize=(12, 4), dpi=200, facecolor='w', edgecolor='k')
plt.suptitle('User Compliance by Day of Week', fontsize=16)
plt.subplot(1, 3, 1)
p1 = plt.bar(DOWn, groupbyDOWallT, color='#F2F2F2')
p2 = plt.bar(DOWn, groupbyDOWallM, color='dimgray', bottom = groupbyDOWallT)
plt.ylabel('Number of Doses', fontsize = 14)
plt.title('all medications', fontsize = 14)
xlabel('Day of the Week', fontsize = 14)
plt.xticks(DOWn+width/2., DOWtup, rotation=45)
xlabel('Day of the Week', fontsize = 12)
plt.yticks(np.arange(0,130,20))
plt.legend( (p1[0], p2[0]), ('Took', 'Missed') )
plt.subplot(1, 3, 2)
p1 = plt.bar(DOWn, groupbyDOWallTA, color='#EBC4B5')
p2 = plt.bar(DOWn, groupbyDOWallMA, color='orangered', bottom = groupbyDOWallTA)
#plt.ylabel('Number of Doses', fontsize = 14)
plt.title('atorvastatin', fontsize = 14)
xlabel('Day of the Week', fontsize = 14)
plt.xticks(DOWn+width/2., DOWtup, rotation=45, fontsize = 11)
xlabel('Day of the Week', fontsize = 14)
plt.yticks(np.arange(0,130,20))
plt.legend( (p1[0], p2[0]), ('Took', 'Missed') )
plt.subplot(1, 3, 3)
p1 = plt.bar(DOWn, groupbyDOWallTL, color='#BCD3E3')
p2 = plt.bar(DOWn, groupbyDOWallML, color='steelblue', bottom = groupbyDOWallTL)
#plt.ylabel('Number of Doses', fontsize = 12)
plt.title('lisinopril', fontsize = 14)
plt.xticks(DOWn+width/2., DOWtup, rotation=45, fontsize = 11)
xlabel('Day of the Week', fontsize = 14)
plt.yticks(np.arange(0,130,20))
plt.legend( (p1[0], p2[0]), ('Took', 'Missed') )
plt.subplots_adjust(top=0.81)
plt.show()
In [387]:
binlistA = np.linspace(-0.5,3,50)
binlist = binlistA.tolist()
timeshiftsAll = df['timeshift_hrs'].values
TookLogical = df['TookMed'].values
timeshiftsA = timeshiftsAll[DrugListA & TookLogical]
timeshiftsL = timeshiftsAll[DrugListL & TookLogical]
plt.figure(num=1, figsize=(10, 5), dpi=80, facecolor='w', edgecolor='k')
plt.suptitle('Histograms of Difference in Time Between when Dose was Scheduled and Taken', fontsize = 16)
#plt.subplot(1, 3, 1)
#plt.hist(timeshiftsA, bins = binlist, color = '#EBC4B5', alpha = 1)
#plt.hist(timeshiftsL, bins = binlist, color = '#BCD3E3', alpha = 1)
#xlabel('Time Difference (Hours)', fontsize = 14)
#plt.ylabel('count')
#plt.ylim(0, 50)
plt.subplot(1, 2, 1)
plt.hist(timeshiftsA, bins = binlist, label = 'atorvastatin', color = '#EBC4B5', alpha = 1)
plt.legend(loc="upper right")
plt.ylabel('count')
xlabel('Time Difference (Hours)', fontsize = 14)
plt.ylim(0, 50)
plt.subplot(1, 2, 2)
plt.hist(timeshiftsL, bins = binlist, label = 'lisinopril', color = '#BCD3E3', alpha = 1)
plt.legend(loc="upper right")
plt.ylabel('count')
xlabel('Time Difference (Hours)', fontsize = 14)
plt.ylim(0, 50)
plt.subplots_adjust(top=0.85)
plt.show()
In [237]:
df['Sched_Hr'] = NaN
df['Sched_Min'] = NaN
for index, row in df.iterrows():
timeraw = row['sched']
timeraw2 = time.strptime(timeraw, '%H:%M')
#print timeraw2
df.ix[index,'Sched_Hr'] = timeraw2.tm_hour
df.ix[index,'Sched_Min'] = timeraw2.tm_min
ScheduleA = df['Sched_Hr'].where(DrugListA).values
ScheduleA8 = AllDates[ScheduleA == 8]
ScheduleA17 = AllDates[ScheduleA == 17]
ScheduleL = df['Sched_Hr'].where(DrugListL).values
ScheduleL8 = AllDates[ScheduleL == 8]
In [404]:
AtsTook = df['timeshift_hrs'].where(DrugListA & TookLogical).values
LtsTook = df['timeshift_hrs'].where(DrugListL & TookLogical).values
plt.figure(num=1, figsize=(13, 5), dpi=200, facecolor='w', edgecolor='k')
plt.suptitle('Difference in Time Between when Dose was Scheduled and Taken', fontsize = 16)
ax1 = plt.subplot2grid((6, 1), (0,0), rowspan=3)
ax1 = plt.scatter(AllDates, AtsTook, color = 'orangered', label = 'atorvastatin', linewidth=1.5, s=6, alpha = 1)
ax1 = plt.axvspan(minADates, tg1, facecolor='orangered', alpha=0.1)
ax1 = plt.axvspan(tg2, maxADates, facecolor='orangered', alpha=0.1)
ax1 = legend(loc="upper right")
ax1 = plt.axhline(y=0, color = 'gray',linewidth=1, alpha = 1)
ax1 = plt.axvline(x=ScheduleA17[0], color = 'orangered', linestyle='--', linewidth=1, alpha = 1)
ax1 = plt.axvline(x=ScheduleA8[0], color = 'orangered', linestyle='--', linewidth=1, alpha = 1)
ax1 = plt.axvline(x=tg2, color = 'orangered', linestyle='--', linewidth=1, alpha = 1)
ax1 = plt.xlim(df.date.min(), df.date.max())
ax1 = plt.annotate('Sched @ 5:00 p.m.', xy=(ScheduleA17[0], 2.75), xycoords='data', xytext=(3, 10), textcoords='offset points', arrowprops=dict(arrowstyle="->"))
ax1 = plt.annotate('Sched @ 8:00 a.m.', xy=(ScheduleA8[0], 2.75), xycoords='data', xytext=(3, 10), textcoords='offset points', arrowprops=dict(arrowstyle="->"))
ax1 = plt.annotate('Sched @ 8:00 a.m.', xy=(tg2, 2.75), xycoords='data', xytext=(3, 10), textcoords='offset points', arrowprops=dict(arrowstyle="->"))
ax1 = plt.xticks([])
ax1 = plt.ylim(-2, 5)
ax1 = plt.yticks([-1,0,1,2,3,4])
#ax1 = xlabel('Date', fontsize = 14)
ax1 = ylabel('(Hours) ', fontsize = 14)
ax2 = plt.subplot2grid((6, 1), (3,0), rowspan=3)
ax2 = plt.scatter(AllDates, LtsTook, color = 'steelblue', label = 'lisinopril', linewidth=1.5, s=6, alpha = 1)
ax2 = plt.axvspan(minLDates, maxLDates, facecolor='steelblue', alpha=0.1)
ax2 = legend(loc="upper right")
ax2 = plt.axhline(y=0, color = 'gray',linewidth=1, alpha = 1)
ax2 = plt.axvline(x=ScheduleL8[1], color = 'steelblue', linestyle='--', linewidth=1.5, alpha = 1)
ax2 = plt.ylim(-2, 5)
ax2 = plt.yticks([-1,0,1,2,3,4])
ax2 = plt.xlim(df.date.min(), df.date.max())
ax2 = plt.annotate('Sched @ 8:00 a.m.', xy=(ScheduleL8[0], 2.75), xycoords='data', xytext=(10, 10), textcoords='offset points', arrowprops=dict(arrowstyle="->"))
ax2 = xlabel('Date', fontsize = 16)
ax2 = ylabel(' Time Difference ', fontsize = 14)
ax2 = plt.subplots_adjust(top=0.9)
ax2 = plt.show()
In [ ]:
#plt.figure();
Adf.plot(kind='line', x = 'date', y = 'timeshift_hrs', figsize=(17, 7), color='k')
plt.title('All Medications')
plt.ylim(-1, 10)
plt.show()
#plt.figure();
Adf.plot(kind='line', x = 'date', y = 'timeshift_hrs', figsize=(17, 7), color='k')
plt.title('Atorvastatin')
plt.ylim(-1, 10)
plt.show()
#plt.figure();
Ldf.plot(kind='line', x = 'date', y = 'timeshift_hrs', figsize=(17, 7), color='k')
plt.title('Lisinopril')
plt.ylim(-1, 10)
plt.show()
In [530]:
df3 = df[['Year','Month','drug','Day','TookMed','timeshift_hrs']]
df3A = df3[df3['drug']=='atorvastatin']
df3L = df3[df3['drug']=='lisinopril']
summary = df3A.groupby(['Year','Month']).describe()
type(summary)
Out[530]:
In [526]:
meanA = df3A.groupby(['Year','Month']).mean()
meanA
Out[526]:
In [527]:
stdA = df3A.groupby(['Year','Month']).std()
stdA
Out[527]:
In [528]:
meanL = df3L.groupby(['Year','Month']).mean()
meanL
Out[528]:
In [529]:
stdL = df3L.groupby(['Year','Month']).std()
stdL
Out[529]:
In [ ]: