In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
import dask.dataframe as dd
import dask.distributed
import numpy as np
import pandas as pd
# import geopandas as gpd
from matplotlib.colors import SymLogNorm as symlog
from matplotlib import rcParams
import sklearn, sklearn.cluster
import matplotlib.pyplot as plt
import palettable
import seaborn as sns
import netCDF4
import geopandas
pd.options.display.max_rows = 300
pd.options.display.max_columns = 100
In [2]:
client = dask.distributed.Client()
In [45]:
tzdf = geopandas.read_file('../shapefiles/taxi_zones.shp')
In [4]:
rcParams['font.sans-serif'] = ('Helvetica', 'Arial', 'Open Sans', 'Bitstream Vera Sans')
rcParams['font.size'] = 12
rcParams['font.stretch'] = 'normal'
rcParams['font.weight'] = 'normal'
rcParams['savefig.dpi'] = 150
rcParams['figure.dpi'] = 150
import seaborn as sns
import os.path
homedirpath = os.path.expanduser('~')
fontdirpath = ''
if '/Users/' in homedirpath:
fontdirpath = os.path.join(homedirpath, 'Library/Fonts/')
else:
fontdirpath = os.path.join(homedirpath, '.fonts/')
fontsize2 = 'size={0:0.1f}'.format(12)
rcParams['mathtext.it'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'HelveticaOblique.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.rm'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.tt'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.bf'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'HelveticaBold.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.cal'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file='
'{0}/Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.sf'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
In [5]:
df = dd.read_parquet('/data/all_trips.parquet', index='trip_id',
columns='pickup_datetime dropoff_datetime pickup_taxizone_id dropoff_taxizone_id'.split())
In [6]:
df2 = df.sample(frac=1.0e-6, random_state=42).compute()
In [7]:
df2 = df2.dropna()
In [8]:
df3 = df2.merge(
tzdf['LocationID borough zone'.split()], left_on='pickup_taxizone_id', right_on='LocationID'
)
df3['pickup_location'] = df3.borough.map(str) + " | " + df3.zone
df3 = df3.drop('LocationID borough zone'.split(), axis=1)
df3 = df3.merge(
tzdf['LocationID borough zone'.split()], left_on='dropoff_taxizone_id', right_on='LocationID'
)
df3['dropoff_location'] = df3.borough.map(str) + " | " + df3.zone
df3 = df3.drop('LocationID borough zone'.split(), axis=1)
df3 = df3.sample(frac=1, replace=False, random_state=42).reset_index(drop=True)
In [9]:
df3.head(10).sort_values('pickup_datetime').reset_index(drop=True).to_html().replace("""\n""", "")
Out[9]:
In [10]:
from IPython.display import HTML
HTML(df3.head(10).sort_values('pickup_datetime').reset_index(drop=True).to_html())
Out[10]:
In [11]:
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
columns=['pickup_taxizone_id', 'dropoff_taxizone_id'])
df['pickup_taxizone_id'] = df.pickup_taxizone_id.fillna(266.).astype(np.int32)
df['dropoff_taxizone_id'] = df.dropoff_taxizone_id.fillna(266.).astype(np.int32)
In [12]:
df.head()
Out[12]:
In [13]:
count_dataframe = df.reset_index().groupby(['pickup_taxizone_id', 'dropoff_taxizone_id']).count().compute()
count_dataframe.columns = ['count']
count_dataframe.shape
Out[13]:
In [14]:
count_dataframe.head()
Out[14]:
In [15]:
count_matrix = np.zeros((267, 267), dtype=np.int64)
for r in count_dataframe.reset_index().itertuples():
count_matrix[r[1], r[2]] = r[3]
In [16]:
count_dataframe.describe()
Out[16]:
In [17]:
count_dataframe.reset_index().head()
Out[17]:
In [18]:
# <!-- collapse=True -->
plt.imshow(count_matrix[1:-3, 1:-3].T, norm=symlog(10000), origin='upper', cmap=plt.cm.Blues)
plt.grid(False)
plt.xlabel("Dropoff Taxi Zone ID")
plt.ylabel("Pickup Taxi Zone ID")
plt.gcf().set_size_inches(4, 4)
In [19]:
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
columns=['pickup_taxizone_id', 'trip_type'])
df = df[df.trip_type != 'uber']
df = df.drop('trip_type', axis=1)
df['pickup_taxizone_id'] = df.pickup_taxizone_id.fillna(266.).astype(np.int32)
In [20]:
def get_year_mo_day(data, col):
# d = np.core.defchararray.replace(np.core.defchararray.add(data.index.values.astype('M8[h]').astype(np.str), ":00"), 'T', ' ')
# return d
return data.index.values.astype('M8[h]')
In [21]:
df['pickup_ymd'] = df.map_partitions(get_year_mo_day, 'pickup_datetime', meta=('asdf', np.datetime64))
In [22]:
df.reset_index().rename(columns=dict(index='N')).tail()
Out[22]:
In [23]:
pickup_counts_df = df.reset_index().rename(columns=dict(index='N')).groupby(['pickup_taxizone_id', 'pickup_ymd',]).count().compute()
pickup_counts_df.sort_index(inplace=True)
In [24]:
pickup_counts_df.head()
Out[24]:
In [25]:
z = pickup_counts_df.unstack(0)
In [26]:
z.columns = np.arange(1, 267).astype(str)
In [27]:
z = z.merge(
pd.DataFrame(index=pd.date_range('2009-01-01 00:00:00', '2016-12-31 23:00:00', freq='H')),
how='right', left_index=True, right_index=True).fillna(0).astype(np.int32)
In [28]:
z.head()
Out[28]:
In [29]:
import fastparquet
fastparquet.write('/data/trips_pickups_matrix.parquet', z, compression='SNAPPY')
In [30]:
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
columns=['dropoff_datetime', 'dropoff_taxizone_id', 'trip_type'])
df = df[df.trip_type != 'uber']
df = df.drop('trip_type', axis=1)
df['dropoff_taxizone_id'] = df.dropoff_taxizone_id.fillna(266.).astype(np.int32)
In [31]:
def get_year_mo_day(data, col):
# d = np.core.defchararray.replace(np.core.defchararray.add(data.index.values.astype('M8[h]').astype(np.str), ":00"), 'T', ' ')
# return d
return data.index.values.astype('M8[h]')
In [32]:
df['dropoff_ymd'] = df.map_partitions(get_year_mo_day, 'dropoff_datetime', meta=('asdf', np.datetime64))
In [33]:
df.reset_index(drop=True).tail()
Out[33]:
In [34]:
dropoff_counts_df = df.reset_index(drop=True).rename(columns=dict(dropoff_datetime='N')).groupby(['dropoff_taxizone_id', 'dropoff_ymd',]).count().compute()
dropoff_counts_df.sort_index(inplace=True)
In [35]:
dropoff_counts_df.head()
Out[35]:
In [36]:
z2 = dropoff_counts_df.unstack(0)
In [37]:
z2 = z2.merge(
pd.DataFrame(index=pd.date_range('2009-01-01 00:00:00', '2016-12-31 23:00:00', freq='H')),
how='right', left_index=True, right_index=True).fillna(0).astype(np.int32)
In [38]:
z2.columns = np.arange(1, 267).astype(str)
In [39]:
z2.head()
Out[39]:
In [40]:
import fastparquet
fastparquet.write('/data/trips_dropoffs_matrix.parquet', z2, compression='SNAPPY')
In [86]:
tzdf = geopandas.read_file('../shapefiles/taxi_zones.shp')
In [87]:
import fastparquet
dropoffs_matrix = fastparquet.ParquetFile('/data/trips_dropoffs_matrix.parquet').to_pandas()
pickups_matrix = fastparquet.ParquetFile('/data/trips_pickups_matrix.parquet').to_pandas()
In [88]:
dropoffs_matrix = dropoffs_matrix.iloc[:, :-3]
pickups_matrix = pickups_matrix.iloc[:, :-3]
In [135]:
counts_matrix = pd.concat([dropoffs_matrix, pickups_matrix], axis=1 )
In [321]:
tzdf.zone[0]
Out[321]:
In [324]:
sns.distplot(counts_matrix.iloc[:, 263+0], kde=False)
sns.distplot(counts_matrix.iloc[:, 0], kde=False)
Out[324]:
In [136]:
import sklearn, sklearn.decomposition
In [414]:
# pca = sklearn.decomposition.PCA(n_components=20, whiten=True)
# # pca.fit(counts_matrix.resample('1D').sum().values)
# pca.fit(counts_matrix.values)
# pca.explained_variance_ratio_
In [500]:
pca = sklearn.decomposition.FastICA(n_components=3, random_state=42, whiten=True)
# pca.fit(counts_matrix.resample('1D').sum().values)
yvals = pca.fit_transform(counts_matrix.values)
# pca.explained_variance_ratio_
In [501]:
yvals.shape
Out[501]:
In [502]:
pickup_eof1, dropoff_eof1 = pca.components_[0, :263], pca.components_[0, 263:]
pickup_eof2, dropoff_eof2 = pca.components_[1, :263], pca.components_[1, 263:]
pickup_eof3, dropoff_eof3 = pca.components_[2, :263], pca.components_[2, 263:]
# pickup_eof4, dropoff_eof4 = pca.components_[3, :263], pca.components_[3, 263:]
# pickup_eof5, dropoff_eof5 = pca.components_[4, :263], pca.components_[4, 263:]
In [503]:
tzdf['pEOF1'] = pickup_eof1
tzdf['dEOF1'] = dropoff_eof1
tzdf['pEOF2'] = pickup_eof2
tzdf['dEOF2'] = dropoff_eof2
tzdf['pEOF3'] = pickup_eof3
tzdf['dEOF3'] = dropoff_eof3
# tzdf['pEOF4'] = pickup_eof4
# tzdf['dEOF4'] = dropoff_eof4
# tzdf['pEOF5'] = pickup_eof5
# tzdf['dEOF5'] = dropoff_eof5
In [504]:
tzdf['N_dropoffs'] = dropoffs_matrix.sum(axis=0).values
tzdf['N_pickups'] = pickups_matrix.sum(axis=0).values
In [505]:
tzdf['log10_N_dropoffs'] = np.log10(tzdf.N_dropoffs)
tzdf['log10_N_pickups'] = np.log10(tzdf.N_pickups)
In [506]:
tzdf = tzdf.to_crs({'init': 'epsg:3857'})
In [507]:
tzdf.head()
Out[507]:
In [526]:
tzdf2 = tzdf.copy()
tzdf2 = tzdf2[(tzdf2.borough != 'Staten Island') & (tzdf2.borough != 'EWR')]
In [527]:
tzdf2 = tzdf2.sort_values('N_dropoffs')
tzdf2['N_dropoffs_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('N_pickups')
tzdf2['N_pickups_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('LocationID')
In [528]:
tzdf2.plot(figsize=(12, 18), alpha=1, column='N_dropoffs_ranked', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5)
ax = plt.gca()
plt.grid(False)
ax.set_facecolor('k')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
In [529]:
tzdf2.plot(figsize=(12, 18), alpha=1, column='N_pickups_ranked', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5)
ax = plt.gca()
plt.grid(False)
ax.set_facecolor('k')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
In [530]:
tzdf2.iloc[:, -10:].describe()
Out[530]:
In [531]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 8), alpha=1, column='pEOF1', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF1', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [537]:
import pysal.esda.mapclassify
In [544]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF2', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF2', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [533]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF3', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF3', cmap=plt.cm.RdBu, edgecolor='k',
linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [516]:
# ax1 = plt.subplot(121)
# tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF4', cmap=plt.cm.RdBu, edgecolor='k',
# linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
# plt.grid(False)
# ax1.set_facecolor('xkcd:silver')
# ax1.xaxis.set_visible(False)
# ax1.yaxis.set_visible(False)
# ax2 = plt.subplot(122)
# tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF4', cmap=plt.cm.RdBu, edgecolor='k',
# linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
# plt.grid(False)
# ax2.set_facecolor('xkcd:silver')
# ax2.xaxis.set_visible(False)
# ax2.yaxis.set_visible(False)
In [517]:
# ax1 = plt.subplot(121)
# tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF5', cmap=plt.cm.RdBu, edgecolor='k',
# linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
# plt.grid(False)
# ax1.set_facecolor('xkcd:silver')
# ax1.xaxis.set_visible(False)
# ax1.yaxis.set_visible(False)
# ax2 = plt.subplot(122)
# tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF5', cmap=plt.cm.RdBu, edgecolor='k',
# linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
# plt.grid(False)
# ax2.set_facecolor('xkcd:silver')
# ax2.xaxis.set_visible(False)
# ax2.yaxis.set_visible(False)
In [520]:
df4 = pd.DataFrame(data=pca.transform(counts_matrix.values)[:, :3], index=counts_matrix.index)
df4.index = df4.index.rename('timepoints')
df4.rename(columns={i:'pc%d' % (i+1) for i in range(3)}, inplace=True)
# df4.reset_index(inplace=True)
In [534]:
df4.plot(lw=1)
plt.xlim('2015-06-22', '2015-06-29')
plt.ylim(-0.02, 0.01)
Out[534]:
In [522]:
df4.plot(lw=0.5)
# plt.xlim('2015-06-22', '2015-06-29')
Out[522]:
In [524]:
df4.resample('1M').mean().plot()
df4.resample('1M').std().plot()
# plt.xlim('2015-06-22', '2015-06-29')
Out[524]:
In [402]:
# df4 = pd.DataFrame(data=pca.transform(counts_matrix.resample('1D').sum().values)[:, :5], index=counts_matrix.resample('1D').sum().index)
# df4.index = df4.index.rename('timepoints')
# df4.rename(columns={i:'pc%d' % i for i in range(5)}, inplace=True)
# # df4.reset_index(inplace=True)
In [403]:
df4.plot()
plt.xlim('2014-04-01', '2014-09-01')
Out[403]:
In [222]:
nmf = sklearn.decomposition.NMF(5, random_state=42)
In [223]:
nmf.fit(counts_matrix.resample('1D').sum().values)
# nmf.explained_variance_ratio_
Out[223]:
In [224]:
nmf.reconstruction_err_
Out[224]:
In [225]:
pickup_eof1, dropoff_eof1 = nmf.components_[0, :263], nmf.components_[0, 263:]
pickup_eof2, dropoff_eof2 = nmf.components_[1, :263], nmf.components_[1, 263:]
pickup_eof3, dropoff_eof3 = nmf.components_[2, :263], nmf.components_[2, 263:]
pickup_eof4, dropoff_eof4 = nmf.components_[3, :263], nmf.components_[3, 263:]
pickup_eof5, dropoff_eof5 = nmf.components_[4, :263], nmf.components_[4, 263:]
In [226]:
tzdf['pEOF1'] = pickup_eof1
tzdf['dEOF1'] = dropoff_eof1
tzdf['pEOF2'] = pickup_eof2
tzdf['dEOF2'] = dropoff_eof2
tzdf['pEOF3'] = pickup_eof3
tzdf['dEOF3'] = dropoff_eof3
tzdf['pEOF4'] = pickup_eof4
tzdf['dEOF4'] = dropoff_eof4
tzdf['pEOF5'] = pickup_eof5
tzdf['dEOF5'] = dropoff_eof5
In [227]:
tzdf2 = tzdf.copy()
tzdf2 = tzdf2[(tzdf2.borough != 'Staten Island') & (tzdf2.borough != 'EWR')]
In [228]:
tzdf2 = tzdf2.sort_values('N_dropoffs')
tzdf2['N_dropoffs_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('N_pickups')
tzdf2['N_pickups_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('LocationID')
In [235]:
tzdf2.iloc[:, -10:].describe()
Out[235]:
In [236]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF1', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=430., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF1', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=430., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [240]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF2', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=292., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF2', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=292., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [241]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF3', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=168., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF3', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=168., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [243]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF4', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=113., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF4', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=113., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [244]:
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF5', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=257., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF5', cmap=plt.cm.viridis, edgecolor='k',
linewidth=0.5, vmin=0, vmax=257., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
In [259]:
df4 = pd.DataFrame(data=nmf.transform(counts_matrix.resample('1D').sum().values), index=counts_matrix.resample('1D').sum().index)
In [272]:
df5 = df4.reset_index()
df5 = df5.rename(columns={'index':'d', 0: 'pc1', 1: 'pc2', 2:'pc3', 3:'pc4', 4:'pc5'})
In [273]:
import plotnine as p9
In [311]:
(p9.ggplot(df5, p9.aes('d', 'pc1')) + p9.geom_point(color='steelblue', size=.2)) + p9.stat_smooth(
method='lm',size=1)
Out[311]:
In [300]:
(p9.ggplot(df5, p9.aes('d', 'pc2')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
Out[300]:
In [294]:
(p9.ggplot(df5, p9.aes('d', 'pc3')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
Out[294]:
In [295]:
(p9.ggplot(df5, p9.aes('d', 'pc4')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
Out[295]:
In [296]:
(p9.ggplot(df5, p9.aes('d', 'pc5')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
Out[296]:
In [411]:
dir(sklearn.decomposition)
Out[411]:
In [ ]: