In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.read_csv("referral.csv")
df.head()
Out[2]:
In [3]:
% matplotlib inline
import matplotlib.pyplot as plt
In [4]:
df['dataframe'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
In [5]:
df_class0 = df[df['is_referral'] == 0]
df_class1 = df[df['is_referral'] == 1]
In [6]:
#plt.plot(df['date'], df['money_spent'], '-')
#_ = plt.xticks(rotation=45)
In [7]:
date_unique = df['date'].unique()
date_unique[:2]
Out[7]:
In [8]:
counts_by_date0 =df_class0[['date']].groupby(['date']).size()
counts_by_date1 =df_class1[['date']].groupby(['date']).size()
first_referral= counts_by_date1.index[0]
print(first_referral)
counts_by_date1[:2]
Out[8]:
In [9]:
from datetime import datetime
#datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p')
datetime_first_referral = datetime.strptime(first_referral,'%Y-%m-%d')
cum_sum = pd.DataFrame(columns=['date','datetime','class 0','class 1'])
counts0 = 0
counts1 = 0
for date in date_unique:
# Include only observations from after the first referral and on:
datetime_str = datetime.strptime(date,'%Y-%m-%d')
if datetime_str >= datetime_first_referral:
try:
counts0 = counts0 + counts_by_date0[date]
except:
continue
try:
counts1 = counts1 + counts_by_date1[date]
except:
continue
newrow = {'date':date,'datetime':datetime_str,'class 0':counts0,'class 1':counts1}
cum_sum.loc[len(cum_sum.values)]=newrow
#cum_sum['datastamp'] = cum_sum.to_datetime(cum_sum['date'], format='%Y-%m-%d')
cum_sum.head(10)
Out[9]:
In [11]:
#plt.plot(cum_sum['datetime'], cum_sum['class 0'], '-')
#_ = plt.xticks(rotation=45)
dashes = [10, 5, 100, 5] # 10 points on, 5 off, 100 on, 5 off
fig, ax = plt.subplots()
line1, = ax.plot(cum_sum['datetime'], cum_sum['class 0'], linewidth = 2,
label='No referral')
line2, = ax.plot(cum_sum['datetime'], cum_sum['class 1'], linewidth = 2,
label='Referral')
plt.xticks(rotation=45)
ax.legend(loc='lower right')
plt.show()
In [ ]: