In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.seasonal import seasonal_decompose
In [5]:
data = pd.read_csv('listings_tot.csv')
data['host_since'] = pd.to_datetime(data['host_since'], format="%Y-%m-%d")
data['host_timing']=np.ones(len(data))
list_count = data.host_timing.groupby(data.host_since).sum()
In [6]:
list_grow = list_count.cumsum()
list_grow.plot(kind='area')
plt.title('Creixement de listings')
plt.ylabel('Nombre de listings')
plt.xlabel('Data')
Out[6]:
In [7]:
host_grow = data[['host_since', 'host_id']]
host_grow = host_grow.drop_duplicates(['host_id'])
host_grow['counts'] = np.ones(len(host_grow))
host_grow.sort_values(['host_since'], inplace=True)
host_grow=host_grow.reset_index(drop=True)
host_grow = host_grow.counts.groupby(host_grow.host_since).sum()
host_grow = host_grow.cumsum()
host_grow.plot(kind='area')
plt.title('Creixement de Hosts')
plt.ylabel('Nombre de hosts')
plt.xlabel('Data')
Out[7]:
In [8]:
host_grow["2015-01-01":"2016-01-01"].plot()
Out[8]:
In [9]:
list_grow.plot(kind='area', alpha=0.7)
host_grow.plot(kind='area', alpha=0.8, color='#c43733')
Out[9]:
In [10]:
ratio = list_grow/host_grow
fig = plt.figure(figsize=(12,7))
ax = fig.add_subplot(1,1,1)
ax2 = fig.add_subplot(2,2,2)
ratio["2010-01-01":].plot(ax=ax)
ratio["2013-01-01":"2018-01-01"].plot(ax=ax2)
plt.title('ratio listings / host')
plt.ylabel('listings / hosts')
plt.xlabel('Data')
Out[10]:
In [11]:
decomposition = seasonal_decompose(list_grow, freq=365)
fig = plt.figure()
fig = decomposition.plot()
In [12]:
decomposition = seasonal_decompose(host_grow, freq=365)#frequència anual
fig = plt.figure()
fig = decomposition.plot()