In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import pylab
import airbnb_pipeline
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [6]:
pwd
Out[6]:
In [7]:
train = pd.read_csv("/Users/alexpapiu/Documents/Insight/Project/Data/new-york-city_2016-12-03_data_listings.csv")
In [4]:
train.head()
Out[4]:
In [5]:
train.columns
Out[5]:
In [9]:
train = airbnb_pipeline.clean(train)
In [10]:
train.shape
Out[10]:
In [11]:
train["price"].hist(bins = 30)
Out[11]:
In [12]:
plot = (train.pivot(columns = "room_type", values = "price")
.plot.hist(bins = 25, stacked = False, alpha = 0.7))
In [13]:
(train.pivot(columns = "bedrooms", values = "price")
.plot.hist(bins = 30, stacked = True))
Out[13]:
In [9]:
train.host_verifications.head()
Out[9]:
In [10]:
train.host_identity_verified.value_counts()
Out[10]:
In [25]:
sns.boxplot(x = "host_identity_verified", y = "price", data = train)
Out[25]:
In [26]:
sns.barplot(x = "host_has_profile_pic", y = "price", data = train)
Out[26]:
In [27]:
train["host_has_profile_pic"].value_counts()
Out[27]:
In [28]:
sns.barplot(x = "is_location_exact", y = "price", data = train)
Out[28]:
In [29]:
['property_type', 'room_type', 'accommodates','bathrooms', 'bedrooms', 'beds', 'bed_type', 'amenities', 'square_feet']
Out[29]:
In [30]:
sns.barplot(x = "property_type", y = "price", data = train)
Out[30]:
In [31]:
sns.barplot(x = "room_type", y = "price", data = train)
Out[31]:
In [32]:
sns.barplot(x = "accommodates", y = "price", data = train)
Out[32]:
In [33]:
sns.barplot(x = "bathrooms", y = "price", data = train)
Out[33]:
In [34]:
train["bathrooms"].value_counts()
Out[34]:
In [35]:
sns.barplot(x = "bedrooms", y = "price", data = train)
Out[35]:
In [23]:
train["bedrooms"].value_counts()
Out[23]:
In [40]:
#too many nans for square feet.
train.square_feet.isnull().sum()
Out[40]:
In [ ]:
['review_scores_location', 'review_scores_value', 'requires_license',
'license', 'jurisdiction_names', 'instant_bookable',
'cancellation_policy', 'require_guest_profile_picture',
'require_guest_phone_verification', 'calculated_host_listings_count',
'reviews_per_month']
In [42]:
sns.lmplot("review_scores_location", "price", data = train)
Out[42]:
In [43]:
sns.barplot("review_scores_location", "price", data = train)
Out[43]:
In [44]:
sns.barplot("review_scores_value", "price", data = train)
Out[44]:
In [51]:
sns.barplot("review_scores_rating", "price", data = train.query("review_scores_rating > 70"))
Out[51]:
In [52]:
sns.barplot("instant_bookable", "price", data = train)
Out[52]:
In [54]:
sns.barplot("require_guest_profile_picture", "price", data = train)
Out[54]:
In [ ]: