In [ ]:
# Count trips by age
from datetime import date, timedelta
import math
def calculate_age(row):
today = date.today()
if math.isnan(row['birthyear'])== False:
bornYear=int(row['birthyear'])
bornDate= date(bornYear,6,1) #average birthdate
return int(today.year - bornDate.year - ((today.month, today.day) < (bornDate.month, bornDate.day)))
trip['age'] = trip.apply(calculate_age, axis=1)
In [ ]:
# Visualization
serie=trip.age.value_counts()
serie=dict(age=serie.index,count_trips=serie.values)
by_age=pd.DataFrame(serie, columns=['count_trips'], index=serie['age'])
by_age=by_age.sort_index(axis=0)
plt.figure(figsize=(30,10));
plt.title('Number of trips per age',fontsize=25)
plt.xlabel('Age',fontsize=20)
by_age['count_trips'].plot(kind='bar',grid=True,fontsize=15);
plt.show()