Airbnb User Sessions Data Exploration


In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set figure aesthetics
sns.set_style("white", {'ytick.major.size': 10.0})
sns.set_context("poster", font_scale=1.1)

In [3]:
# Load the data into DataFrames
path = '../data/'
train_users = pd.read_csv(path + 'train_users.csv')
test_users = pd.read_csv(path + 'test_users.csv')
sessions = pd.read_csv(path + 'sessions.csv')

In [4]:
# Merge train and test users
users = pd.concat((train_users, test_users), axis=0, ignore_index=True)

In [5]:
print("We have", len(users), "users and", len(sessions.user_id.unique()), "ID's in the session set.")


We have 275547 users and 135484 ID's in the session set.

In [6]:
print(sessions.shape)


(10567737, 6)

In [7]:
sessions.isnull().sum()


Out[7]:
user_id            34496
action             79626
action_type      1126204
action_detail    1126204
device_type            0
secs_elapsed      136031
dtype: int64

In [8]:
sessions.loc[sessions['user_id'].isnull()].head()


Out[8]:
user_id action action_type action_detail device_type secs_elapsed
6715 NaN calendar_tab_inner2 -unknown- -unknown- Mac Desktop 4890
6716 NaN create submit create_user Mac Desktop NaN
6717 NaN header_userpic data header_userpic Mac Desktop 768
6718 NaN calendar_tab_inner2 -unknown- -unknown- Mac Desktop 7578
19921 NaN edit view edit_profile iPad Tablet 6194

In [9]:
sessions.action_type.unique()


Out[9]:
array([nan, 'click', 'data', 'view', 'submit', 'message_post', '-unknown-',
       'booking_request', 'partner_callback', 'booking_response', 'modify'], dtype=object)

In [10]:
sessions['action_type'] = sessions['action_type'].replace('-unknown-', np.nan)

In [11]:
# list(sessions.action.unique())

In [12]:
sessions.action.value_counts().head()


Out[12]:
show              2768278
index              843699
search_results     725226
personalize        706824
search             536057
Name: action, dtype: int64

In [13]:
sessions.action_type.value_counts()


Out[13]:
view                3560902
data                2103770
click               1996183
submit               623357
message_post          87103
partner_callback      19132
booking_request       18773
modify                 1139
booking_response          4
Name: action_type, dtype: int64

In [14]:
sessions.loc[sessions.action_type == 'booking_response']


Out[14]:
user_id action action_type action_detail device_type secs_elapsed
284946 6udv3scuxe booking booking_response booking Windows Desktop 71212
3657669 yxf0sm9sbw booking booking_response booking Windows Desktop 36905
6543673 yjbnf70oit booking booking_response booking Windows Desktop 0
9658676 nttj7g9av6 booking booking_response booking Windows Desktop 34389

In [15]:
sessions.device_type.value_counts()


Out[15]:
Mac Desktop                         3594286
Windows Desktop                     2658539
iPhone                              2105031
Android Phone                        839637
iPad Tablet                          683414
Android App Unknown Phone/Tablet     273652
-unknown-                            211279
Tablet                               139886
Linux Desktop                         28373
Chromebook                            22348
iPodtouch                              8198
Windows Phone                          2047
Blackberry                              979
Opera Phone                              68
Name: device_type, dtype: int64

In [16]:
train_users.set_index('id', inplace=True)
sessions.set_index('user_id', inplace=True)

In [17]:
users_with_destination = train_users.loc[train_users['country_destination'] != 'NDF']
a = users_with_destination.index.values
b = sessions.index.unique()
sessions_id = list(set(a).intersection(b))

In [18]:
users_with_destination_sessions = sessions.loc[sessions_id]

In [19]:
users_with_destination_sessions.action.value_counts()


Out[19]:
show                                  547951
personalize                           211836
index                                 209351
search_results                        192670
ajax_refresh_subtotal                 157051
similar_listings                      147387
search                                 66580
update                                 63860
lookup                                 62281
social_connections                     47225
create                                 38650
dashboard                              38519
header_userpic                         36714
reviews                                32361
edit                                   32336
track_page_view                        30214
requested                              29551
active                                 28077
qt2                                    25918
calendar_tab_inner2                    19438
collections                            19343
ajax_check_dates                       18358
confirm_email                          17857
identity                               17567
ask_question                           15841
travel_plans_current                   14481
show_personalize                       14361
campaigns                              13932
listings                               12755
other_hosting_reviews_first            11989
                                       ...  
approve                                    3
zendesk_login_jwt                          3
social-media                               2
friend_listing                             2
signup_weibo                               2
print_confirmation                         2
photography_update                         2
has_profile_pic                            2
payoneer_signup_complete                   2
message                                    2
locale_from_host                           2
show_code                                  2
weibo_signup_referral_finish               1
ajax_special_offer_dates_available         1
toggle_availability                        1
apply                                      1
update_message                             1
plaxo_cb                                   1
new_host                                   1
views_campaign                             1
pricing                                    1
change_availability                        1
desks                                      1
envoy_bank_details_redirect                1
preapproval                                1
maybe_information                          1
booking                                    1
deauthorize                                1
wishlists                                  1
media_resources                            1
Name: action, dtype: int64

In [ ]:
# When there is a booking what is the most probable action

In [ ]:
# Elapsed Seconds