In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from datetime import date
In [2]:
pd.__version__,np.__version__, mpl.__version__
Out[2]:
In [3]:
df = pd.DataFrame({
'name':[
'john','mary','peter','jeff','bill'
],
'date_of_birth':[
'2000-01-01', '1999-12-20', '2000-11-01', '1995-02-25', '1992-06-30',
],
})
df
Out[3]:
In [4]:
df.index
Out[4]:
In [5]:
datetime_series = pd.to_datetime(df['date_of_birth'])
datetime_index = pd.DatetimeIndex(datetime_series.values)
df2=df.set_index(datetime_index)
df2.drop('date_of_birth',axis=1,inplace=True)
df2
Out[5]:
In [6]:
df2.index
Out[6]:
In [7]:
df = pd.DataFrame({
'name':[
'john','mary','peter','jeff','bill'
],
'year_born':[
'2000', '1999', '2001', '1995', '1992',
],
})
df
Out[7]:
In [8]:
df.index
Out[8]:
In [9]:
datetime_series = pd.to_datetime(df['year_born'])
datetime_index = pd.DatetimeIndex(datetime_series.values)
df3=df.set_index(datetime_index)
df3.drop('year_born',axis=1,inplace=True)
df3.sort_index(inplace=True)
df3
Out[9]:
In [10]:
df3.index
Out[10]:
In [11]:
df4=df3.asfreq('YS')
df4
Out[11]:
In [12]:
df4.index
Out[12]:
In [13]:
df = pd.DataFrame(
data={'reading': np.random.uniform(high=100,size=10)},
index=pd.to_datetime([date(2019,1,d) for d in range(1,11)])
)
df
Out[13]:
In [14]:
df['reading_d_minus_1']=df['reading'].shift(1,freq='D')
df['reading_d_minus_2']=df['reading'].shift(2,freq='D')
In [15]:
df
Out[15]:
In [17]:
from datetime import date,datetime,timedelta
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
np.random.seed(42)
# create a dummy dataset
df = pd.DataFrame(
index=pd.to_datetime(
[datetime(2019,1,1)+timedelta(hours=h+10*np.random.uniform()) for h in range(0,1000)]
),
data={
'browser': np.random.choice([
'chrome',
'firefox',
'safari',
'edge'
],size=1000,replace=True)
},
)
print(type(df.index))
# 'pandas.core.indexes.datetimes.DatetimeIndex'
# just to make it more intuitive
df.index.name= 'login_datetime'
df.groupby(by=[lambda dt: dt.strftime('%Y-%m-%d'),'browser']).size().groupby(level=0).apply(
lambda x: 100 * x / x.sum()
).unstack().plot(kind='bar',stacked=True,legend='reverse',width=0.8)
current_handles, current_labels = plt.gca().get_legend_handles_labels()
# sort or reorder the labels and handles
reversed_handles = list(reversed(current_handles))
reversed_labels = list(reversed(current_labels))
# call plt.legend() with the new values
plt.legend(reversed_handles,reversed_labels,loc='center left', bbox_to_anchor=(1, 0.5))
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter())
plt.gcf().set_size_inches(12,5)
plt.title('Agreggated number of visits to website, per day and browser')
plt.show()
In [ ]: