In [1]:
    
%matplotlib inline
%config InlineBackend.figure_format='retina'
import dask.dataframe as dd
import dask.distributed
import numpy as np
import pandas as pd
# import geopandas as gpd
from matplotlib.colors import SymLogNorm as symlog
from matplotlib import rcParams
import sklearn, sklearn.cluster
import matplotlib.pyplot as plt
import palettable
import seaborn as sns
import netCDF4
import geopandas
pd.options.display.max_rows = 300
pd.options.display.max_columns = 100
    
In [2]:
    
client = dask.distributed.Client()
    
In [45]:
    
tzdf = geopandas.read_file('../shapefiles/taxi_zones.shp')
    
In [4]:
    
rcParams['font.sans-serif'] = ('Helvetica', 'Arial', 'Open Sans', 'Bitstream Vera Sans')
rcParams['font.size'] = 12
rcParams['font.stretch'] = 'normal'
rcParams['font.weight'] = 'normal'
rcParams['savefig.dpi'] = 150
rcParams['figure.dpi'] = 150
import seaborn as sns
import os.path
homedirpath = os.path.expanduser('~')
fontdirpath = ''
if '/Users/' in homedirpath:
    fontdirpath = os.path.join(homedirpath, 'Library/Fonts/')
else:
    fontdirpath = os.path.join(homedirpath, '.fonts/')
fontsize2 = 'size={0:0.1f}'.format(12)
rcParams['mathtext.it'] = ((':family=sans-serif:style=normal:variant='
                            'normal:weight=normal:stretch=normal:file={0}/'
                            'HelveticaOblique.ttf:' +
                            fontsize2
                            ).format(fontdirpath))
rcParams['mathtext.rm'] = ((':family=sans-serif:style=normal:variant='
                            'normal:weight=normal:stretch=normal:file={0}/'
                            'Helvetica.ttf:' +
                            fontsize2
                            ).format(fontdirpath))
rcParams['mathtext.tt'] = ((':family=sans-serif:style=normal:variant='
                            'normal:weight=normal:stretch=normal:file={0}/'
                            'Helvetica.ttf:' +
                            fontsize2
                            ).format(fontdirpath))
rcParams['mathtext.bf'] = ((':family=sans-serif:style=normal:variant='
                            'normal:weight=normal:stretch=normal:file={0}/'
                            'HelveticaBold.ttf:' +
                            fontsize2
                            ).format(fontdirpath))
rcParams['mathtext.cal'] = ((':family=sans-serif:style=normal:variant='
                             'normal:weight=normal:stretch=normal:file='
                             '{0}/Helvetica.ttf:' +
                             fontsize2
                             ).format(fontdirpath))
rcParams['mathtext.sf'] = ((':family=sans-serif:style=normal:variant='
                            'normal:weight=normal:stretch=normal:file={0}/'
                            'Helvetica.ttf:' +
                            fontsize2
                            ).format(fontdirpath))
    
In [5]:
    
df = dd.read_parquet('/data/all_trips.parquet', index='trip_id', 
    columns='pickup_datetime dropoff_datetime pickup_taxizone_id dropoff_taxizone_id'.split())
    
    
In [6]:
    
df2 = df.sample(frac=1.0e-6, random_state=42).compute()
    
In [7]:
    
df2 = df2.dropna()
    
In [8]:
    
df3 = df2.merge(
    tzdf['LocationID borough zone'.split()], left_on='pickup_taxizone_id', right_on='LocationID'
)
df3['pickup_location'] = df3.borough.map(str) + " | " +  df3.zone
df3 = df3.drop('LocationID borough zone'.split(), axis=1)
df3 = df3.merge(
    tzdf['LocationID borough zone'.split()], left_on='dropoff_taxizone_id', right_on='LocationID'
)
df3['dropoff_location'] = df3.borough.map(str) + " | " +  df3.zone
df3 = df3.drop('LocationID borough zone'.split(), axis=1)
df3 = df3.sample(frac=1, replace=False, random_state=42).reset_index(drop=True)
    
In [9]:
    
df3.head(10).sort_values('pickup_datetime').reset_index(drop=True).to_html().replace("""\n""", "")
    
    Out[9]:
In [10]:
    
from IPython.display import HTML
HTML(df3.head(10).sort_values('pickup_datetime').reset_index(drop=True).to_html())
    
    Out[10]:
In [11]:
    
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
                     columns=['pickup_taxizone_id', 'dropoff_taxizone_id'])
df['pickup_taxizone_id'] = df.pickup_taxizone_id.fillna(266.).astype(np.int32)
df['dropoff_taxizone_id'] = df.dropoff_taxizone_id.fillna(266.).astype(np.int32)
    
    
In [12]:
    
df.head()
    
    Out[12]:
In [13]:
    
count_dataframe = df.reset_index().groupby(['pickup_taxizone_id', 'dropoff_taxizone_id']).count().compute()
count_dataframe.columns = ['count']
count_dataframe.shape
    
    Out[13]:
In [14]:
    
count_dataframe.head()
    
    Out[14]:
In [15]:
    
count_matrix = np.zeros((267, 267), dtype=np.int64)
for r in count_dataframe.reset_index().itertuples():
    count_matrix[r[1], r[2]] = r[3]
    
In [16]:
    
count_dataframe.describe()
    
    Out[16]:
In [17]:
    
count_dataframe.reset_index().head()
    
    Out[17]:
In [18]:
    
# <!-- collapse=True -->
plt.imshow(count_matrix[1:-3, 1:-3].T, norm=symlog(10000), origin='upper', cmap=plt.cm.Blues)
plt.grid(False)
plt.xlabel("Dropoff Taxi Zone ID")
plt.ylabel("Pickup Taxi Zone ID")
plt.gcf().set_size_inches(4, 4)
    
    
In [19]:
    
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
                     columns=['pickup_taxizone_id', 'trip_type'])
df = df[df.trip_type != 'uber']
df = df.drop('trip_type', axis=1)
df['pickup_taxizone_id'] = df.pickup_taxizone_id.fillna(266.).astype(np.int32)
    
    
In [20]:
    
def get_year_mo_day(data, col):
#     d = np.core.defchararray.replace(np.core.defchararray.add(data.index.values.astype('M8[h]').astype(np.str), ":00"), 'T', ' ')
#     return d
    return data.index.values.astype('M8[h]')
    
In [21]:
    
df['pickup_ymd'] = df.map_partitions(get_year_mo_day, 'pickup_datetime', meta=('asdf', np.datetime64))
    
    
In [22]:
    
df.reset_index().rename(columns=dict(index='N')).tail()
    
    Out[22]:
In [23]:
    
pickup_counts_df = df.reset_index().rename(columns=dict(index='N')).groupby(['pickup_taxizone_id', 'pickup_ymd',]).count().compute()
pickup_counts_df.sort_index(inplace=True)
    
In [24]:
    
pickup_counts_df.head()
    
    Out[24]:
In [25]:
    
z = pickup_counts_df.unstack(0)
    
In [26]:
    
z.columns = np.arange(1, 267).astype(str)
    
In [27]:
    
z = z.merge(
    pd.DataFrame(index=pd.date_range('2009-01-01 00:00:00', '2016-12-31 23:00:00', freq='H')), 
    how='right', left_index=True, right_index=True).fillna(0).astype(np.int32)
    
In [28]:
    
z.head()
    
    Out[28]:
In [29]:
    
import fastparquet
fastparquet.write('/data/trips_pickups_matrix.parquet', z, compression='SNAPPY')
    
In [30]:
    
df = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='pickup_datetime',
                     columns=['dropoff_datetime', 'dropoff_taxizone_id', 'trip_type'])
df = df[df.trip_type != 'uber']
df = df.drop('trip_type', axis=1)
df['dropoff_taxizone_id'] = df.dropoff_taxizone_id.fillna(266.).astype(np.int32)
    
    
In [31]:
    
def get_year_mo_day(data, col):
#     d = np.core.defchararray.replace(np.core.defchararray.add(data.index.values.astype('M8[h]').astype(np.str), ":00"), 'T', ' ')
#     return d
    return data.index.values.astype('M8[h]')
    
In [32]:
    
df['dropoff_ymd'] = df.map_partitions(get_year_mo_day, 'dropoff_datetime', meta=('asdf', np.datetime64))
    
    
In [33]:
    
df.reset_index(drop=True).tail()
    
    Out[33]:
In [34]:
    
dropoff_counts_df = df.reset_index(drop=True).rename(columns=dict(dropoff_datetime='N')).groupby(['dropoff_taxizone_id', 'dropoff_ymd',]).count().compute()
dropoff_counts_df.sort_index(inplace=True)
    
In [35]:
    
dropoff_counts_df.head()
    
    Out[35]:
In [36]:
    
z2 = dropoff_counts_df.unstack(0)
    
In [37]:
    
z2 = z2.merge(
    pd.DataFrame(index=pd.date_range('2009-01-01 00:00:00', '2016-12-31 23:00:00', freq='H')), 
    how='right', left_index=True, right_index=True).fillna(0).astype(np.int32)
    
    
In [38]:
    
z2.columns = np.arange(1, 267).astype(str)
    
In [39]:
    
z2.head()
    
    Out[39]:
In [40]:
    
import fastparquet
fastparquet.write('/data/trips_dropoffs_matrix.parquet', z2, compression='SNAPPY')
    
In [86]:
    
tzdf = geopandas.read_file('../shapefiles/taxi_zones.shp')
    
In [87]:
    
import fastparquet
dropoffs_matrix = fastparquet.ParquetFile('/data/trips_dropoffs_matrix.parquet').to_pandas()
pickups_matrix = fastparquet.ParquetFile('/data/trips_pickups_matrix.parquet').to_pandas()
    
In [88]:
    
dropoffs_matrix = dropoffs_matrix.iloc[:, :-3]
pickups_matrix = pickups_matrix.iloc[:, :-3]
    
In [135]:
    
counts_matrix = pd.concat([dropoffs_matrix, pickups_matrix], axis=1 )
    
In [321]:
    
tzdf.zone[0]
    
    Out[321]:
In [324]:
    
sns.distplot(counts_matrix.iloc[:, 263+0], kde=False)
sns.distplot(counts_matrix.iloc[:, 0], kde=False)
    
    Out[324]:
    
In [136]:
    
import sklearn, sklearn.decomposition
    
In [414]:
    
# pca = sklearn.decomposition.PCA(n_components=20, whiten=True)
# # pca.fit(counts_matrix.resample('1D').sum().values)
# pca.fit(counts_matrix.values)
# pca.explained_variance_ratio_
    
In [500]:
    
pca = sklearn.decomposition.FastICA(n_components=3, random_state=42, whiten=True)
# pca.fit(counts_matrix.resample('1D').sum().values)
yvals = pca.fit_transform(counts_matrix.values)
# pca.explained_variance_ratio_
    
    
In [501]:
    
yvals.shape
    
    Out[501]:
In [502]:
    
pickup_eof1, dropoff_eof1 = pca.components_[0, :263], pca.components_[0, 263:]
pickup_eof2, dropoff_eof2 = pca.components_[1, :263], pca.components_[1, 263:]
pickup_eof3, dropoff_eof3 = pca.components_[2, :263], pca.components_[2, 263:]
# pickup_eof4, dropoff_eof4 = pca.components_[3, :263], pca.components_[3, 263:]
# pickup_eof5, dropoff_eof5 = pca.components_[4, :263], pca.components_[4, 263:]
    
In [503]:
    
tzdf['pEOF1'] = pickup_eof1
tzdf['dEOF1'] = dropoff_eof1
tzdf['pEOF2'] = pickup_eof2
tzdf['dEOF2'] = dropoff_eof2
tzdf['pEOF3'] = pickup_eof3
tzdf['dEOF3'] = dropoff_eof3
# tzdf['pEOF4'] = pickup_eof4
# tzdf['dEOF4'] = dropoff_eof4
# tzdf['pEOF5'] = pickup_eof5
# tzdf['dEOF5'] = dropoff_eof5
    
In [504]:
    
tzdf['N_dropoffs'] = dropoffs_matrix.sum(axis=0).values
tzdf['N_pickups'] = pickups_matrix.sum(axis=0).values
    
In [505]:
    
tzdf['log10_N_dropoffs'] = np.log10(tzdf.N_dropoffs)
tzdf['log10_N_pickups'] = np.log10(tzdf.N_pickups)
    
In [506]:
    
tzdf = tzdf.to_crs({'init': 'epsg:3857'})
    
In [507]:
    
tzdf.head()
    
    Out[507]:
In [526]:
    
tzdf2 = tzdf.copy()
tzdf2 = tzdf2[(tzdf2.borough != 'Staten Island') & (tzdf2.borough != 'EWR')]
    
In [527]:
    
tzdf2 = tzdf2.sort_values('N_dropoffs')
tzdf2['N_dropoffs_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('N_pickups')
tzdf2['N_pickups_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('LocationID')
    
In [528]:
    
tzdf2.plot(figsize=(12, 18), alpha=1, column='N_dropoffs_ranked', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5)
ax = plt.gca()
plt.grid(False)
ax.set_facecolor('k')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
    
    
In [529]:
    
tzdf2.plot(figsize=(12, 18), alpha=1, column='N_pickups_ranked', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5)
ax = plt.gca()
plt.grid(False)
ax.set_facecolor('k')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
    
    
In [530]:
    
tzdf2.iloc[:, -10:].describe()
    
    Out[530]:
In [531]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 8), alpha=1, column='pEOF1', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF1', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [537]:
    
import pysal.esda.mapclassify
    
In [544]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF2', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF2', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [533]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF3', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF3', cmap=plt.cm.RdBu, edgecolor='k',
          linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [516]:
    
# ax1 = plt.subplot(121)
# tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF4', cmap=plt.cm.RdBu, edgecolor='k',
#           linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
# plt.grid(False)
# ax1.set_facecolor('xkcd:silver')
# ax1.xaxis.set_visible(False)
# ax1.yaxis.set_visible(False)
# ax2 = plt.subplot(122)
# tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF4', cmap=plt.cm.RdBu, edgecolor='k',
#           linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
# plt.grid(False)
# ax2.set_facecolor('xkcd:silver')
# ax2.xaxis.set_visible(False)
# ax2.yaxis.set_visible(False)
    
In [517]:
    
# ax1 = plt.subplot(121)
# tzdf2.plot(figsize=(18, 12), alpha=1, column='pEOF5', cmap=plt.cm.RdBu, edgecolor='k',
#           linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax1)
# plt.grid(False)
# ax1.set_facecolor('xkcd:silver')
# ax1.xaxis.set_visible(False)
# ax1.yaxis.set_visible(False)
# ax2 = plt.subplot(122)
# tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF5', cmap=plt.cm.RdBu, edgecolor='k',
#           linewidth=0.5, vmin=-0.2e-5, vmax=0.2e-5, ax=ax2)
# plt.grid(False)
# ax2.set_facecolor('xkcd:silver')
# ax2.xaxis.set_visible(False)
# ax2.yaxis.set_visible(False)
    
In [520]:
    
df4 = pd.DataFrame(data=pca.transform(counts_matrix.values)[:, :3], index=counts_matrix.index)
df4.index = df4.index.rename('timepoints')
df4.rename(columns={i:'pc%d' % (i+1) for i in range(3)}, inplace=True)
# df4.reset_index(inplace=True)
    
In [534]:
    
df4.plot(lw=1)
plt.xlim('2015-06-22', '2015-06-29')
plt.ylim(-0.02, 0.01)
    
    Out[534]:
    
In [522]:
    
df4.plot(lw=0.5)
# plt.xlim('2015-06-22', '2015-06-29')
    
    Out[522]:
    
In [524]:
    
df4.resample('1M').mean().plot()
df4.resample('1M').std().plot()
# plt.xlim('2015-06-22', '2015-06-29')
    
    Out[524]:
    
    
In [402]:
    
# df4 = pd.DataFrame(data=pca.transform(counts_matrix.resample('1D').sum().values)[:, :5], index=counts_matrix.resample('1D').sum().index)
# df4.index = df4.index.rename('timepoints')
# df4.rename(columns={i:'pc%d' % i for i in range(5)}, inplace=True)
# # df4.reset_index(inplace=True)
    
In [403]:
    
df4.plot()
plt.xlim('2014-04-01', '2014-09-01')
    
    Out[403]:
    
In [222]:
    
nmf = sklearn.decomposition.NMF(5, random_state=42)
    
In [223]:
    
nmf.fit(counts_matrix.resample('1D').sum().values)
# nmf.explained_variance_ratio_
    
    Out[223]:
In [224]:
    
nmf.reconstruction_err_
    
    Out[224]:
In [225]:
    
pickup_eof1, dropoff_eof1 = nmf.components_[0, :263], nmf.components_[0, 263:]
pickup_eof2, dropoff_eof2 = nmf.components_[1, :263], nmf.components_[1, 263:]
pickup_eof3, dropoff_eof3 = nmf.components_[2, :263], nmf.components_[2, 263:]
pickup_eof4, dropoff_eof4 = nmf.components_[3, :263], nmf.components_[3, 263:]
pickup_eof5, dropoff_eof5 = nmf.components_[4, :263], nmf.components_[4, 263:]
    
In [226]:
    
tzdf['pEOF1'] = pickup_eof1
tzdf['dEOF1'] = dropoff_eof1
tzdf['pEOF2'] = pickup_eof2
tzdf['dEOF2'] = dropoff_eof2
tzdf['pEOF3'] = pickup_eof3
tzdf['dEOF3'] = dropoff_eof3
tzdf['pEOF4'] = pickup_eof4
tzdf['dEOF4'] = dropoff_eof4
tzdf['pEOF5'] = pickup_eof5
tzdf['dEOF5'] = dropoff_eof5
    
In [227]:
    
tzdf2 = tzdf.copy()
tzdf2 = tzdf2[(tzdf2.borough != 'Staten Island') & (tzdf2.borough != 'EWR')]
    
In [228]:
    
tzdf2 = tzdf2.sort_values('N_dropoffs')
tzdf2['N_dropoffs_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('N_pickups')
tzdf2['N_pickups_ranked'] = np.linspace(0, 1., tzdf2.shape[0])
tzdf2 = tzdf2.sort_values('LocationID')
    
In [235]:
    
tzdf2.iloc[:, -10:].describe()
    
    Out[235]:
In [236]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF1', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=430., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF1', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=430., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [240]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF2', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=292., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF2', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=292., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [241]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF3', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=168., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF3', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=168., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [243]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF4', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=113., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF4', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=113., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [244]:
    
ax1 = plt.subplot(121)
tzdf2.plot(figsize=(12, 18), alpha=1, column='pEOF5', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=257., ax=ax1)
plt.grid(False)
ax1.set_facecolor('xkcd:silver')
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax2 = plt.subplot(122)
tzdf2.plot(figsize=(12, 18), alpha=1, column='dEOF5', cmap=plt.cm.viridis, edgecolor='k',
          linewidth=0.5, vmin=0, vmax=257., ax=ax2)
plt.grid(False)
ax2.set_facecolor('xkcd:silver')
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
    
    
In [259]:
    
df4 = pd.DataFrame(data=nmf.transform(counts_matrix.resample('1D').sum().values), index=counts_matrix.resample('1D').sum().index)
    
In [272]:
    
df5 = df4.reset_index()
df5 = df5.rename(columns={'index':'d', 0: 'pc1', 1: 'pc2', 2:'pc3', 3:'pc4', 4:'pc5'})
    
In [273]:
    
import plotnine as p9
    
In [311]:
    
(p9.ggplot(df5, p9.aes('d', 'pc1')) + p9.geom_point(color='steelblue', size=.2)) + p9.stat_smooth(
    method='lm',size=1)
    
    
    Out[311]:
In [300]:
    
(p9.ggplot(df5, p9.aes('d', 'pc2')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
    
    
    
    Out[300]:
In [294]:
    
(p9.ggplot(df5, p9.aes('d', 'pc3')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
    
    
    
    Out[294]:
In [295]:
    
(p9.ggplot(df5, p9.aes('d', 'pc4')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
    
    
    
    Out[295]:
In [296]:
    
(p9.ggplot(df5, p9.aes('d', 'pc5')) + p9.geom_point()) + p9.stat_smooth(method='lowess')
    
    
    
    Out[296]:
In [411]:
    
dir(sklearn.decomposition)
    
    Out[411]:
In [ ]: