In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

import statsmodels.api as sm  
from statsmodels.tsa.stattools import acf  
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.seasonal import seasonal_decompose

In [5]:
data = pd.read_csv('listings_tot.csv')
data['host_since'] = pd.to_datetime(data['host_since'], format="%Y-%m-%d")
data['host_timing']=np.ones(len(data))
list_count = data.host_timing.groupby(data.host_since).sum()


/home/biel/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (43) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

Creixement de listings


In [6]:
list_grow = list_count.cumsum()
list_grow.plot(kind='area')
plt.title('Creixement de listings')
plt.ylabel('Nombre de listings')
plt.xlabel('Data')


Out[6]:
<matplotlib.text.Text at 0x7fde4f7173d0>

Creixement de hostings


In [7]:
host_grow = data[['host_since', 'host_id']]
host_grow = host_grow.drop_duplicates(['host_id'])
host_grow['counts'] = np.ones(len(host_grow))

host_grow.sort_values(['host_since'], inplace=True)
host_grow=host_grow.reset_index(drop=True)

host_grow = host_grow.counts.groupby(host_grow.host_since).sum()
host_grow = host_grow.cumsum()

host_grow.plot(kind='area')
plt.title('Creixement de Hosts')
plt.ylabel('Nombre de hosts')
plt.xlabel('Data')


Out[7]:
<matplotlib.text.Text at 0x7fde53c7b810>

In [8]:
host_grow["2015-01-01":"2016-01-01"].plot()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fde53c5a690>

Listings vs hostings


In [9]:
list_grow.plot(kind='area', alpha=0.7)
host_grow.plot(kind='area', alpha=0.8, color='#c43733')


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fde53c5a610>

In [10]:
ratio = list_grow/host_grow

fig = plt.figure(figsize=(12,7))
ax = fig.add_subplot(1,1,1)
ax2 = fig.add_subplot(2,2,2)
ratio["2010-01-01":].plot(ax=ax)
ratio["2013-01-01":"2018-01-01"].plot(ax=ax2)
plt.title('ratio listings / host')
plt.ylabel('listings / hosts')
plt.xlabel('Data')


Out[10]:
<matplotlib.text.Text at 0x7fde5390b190>

Estacionalitat de les dades

Listings


In [11]:
decomposition = seasonal_decompose(list_grow, freq=365)  
fig = plt.figure()  
fig = decomposition.plot()


/home/biel/anaconda2/lib/python2.7/site-packages/statsmodels/tsa/filters/filtertools.py:28: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return np.r_[[np.nan] * head, x, [np.nan] * tail]
<matplotlib.figure.Figure at 0x7fde5382c850>

Hostings


In [12]:
decomposition = seasonal_decompose(host_grow, freq=365)#frequència anual
fig = plt.figure()  
fig = decomposition.plot()


<matplotlib.figure.Figure at 0x7fde5382c750>