In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
import dask.dataframe as dd
import dask.array as da
import dask.distributed
import geopandas as gpd
import pandas as pd
import numpy as np
import iris, iris.plot
import sklearn
from shapely.geometry import Point
import matplotlib.pyplot as plt
from matplotlib.colors import SymLogNorm as symlog
from matplotlib import rcParams
import seaborn as sns
import palettable
sns.set_style('white')
client = dask.distributed.Client()
In [2]:
rcParams['font.sans-serif'] = ('Helvetica', 'Arial', 'Open Sans', 'Bitstream Vera Sans')
rcParams['font.size'] = 12
rcParams['font.stretch'] = 'normal'
rcParams['font.weight'] = 'normal'
import os.path
homedirpath = os.path.expanduser('~')
fontdirpath = ''
if '/Users/' in homedirpath:
fontdirpath = os.path.join(homedirpath, 'Library/Fonts/')
else:
fontdirpath = os.path.join(homedirpath, '.fonts/')
fontsize2 = 'size={0:0.1f}'.format(12)
rcParams['mathtext.it'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'HelveticaOblique.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.rm'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.tt'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.bf'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'HelveticaBold.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.cal'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file='
'{0}/Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
rcParams['mathtext.sf'] = ((':family=sans-serif:style=normal:variant='
'normal:weight=normal:stretch=normal:file={0}/'
'Helvetica.ttf:' +
fontsize2
).format(fontdirpath))
In [3]:
bike = dd.read_parquet('/data/citibike.parquet', engine='fastparquet')
In [4]:
bike = bike.set_index('start_time', npartitions=64)
In [5]:
bike.to_parquet('/data/citibike_start_time_indexed.parquet', compression='SNAPPY', has_nulls=True, object_encoding='json')
In [6]:
bike = dd.read_parquet('/data/citibike_start_time_indexed.parquet', engine='fastparquet')
In [29]:
bike2=bike[['start_station_id', 'end_station_id']]
In [30]:
z = bike2.sample(frac=0.1, random_state=42).compute()
In [8]:
def feature_engineer_times(df):
df2 = pd.DataFrame(index=df.index)
df2['hours_till_next_business_hour'] = ((((df.index + pd.tseries.offsets.BusinessHour(.01))
- df.index).total_seconds())/3600.)
In [9]:
z.index.weekofyear
Out[9]:
In [10]:
(z.index.dayofyear / 365.) + (z.index.hour / 24./365.) + (z.index.minute / 60. / 24. / 365.)
Out[10]:
In [11]:
(z.index.hour / 24.) + (z.index.minute / 60. / 24. )
Out[11]:
In [12]:
(z.index.minute / 60. )
Out[12]:
In [13]:
# d_df['weekday_name'] = d_df.map_partitions(lambda l: l.index.weekday_name, meta=('asdf', str))
# d_df['bday_forward'] = d_df.map_partitions(lambda l: (l.index+pd.tseries.offsets.BusinessHour(1)) - (l.index), meta=('asdf', 'M8[ns]'))
In [31]:
sns.distplot((z.index.hour / 24.) + (z.index.minute / 60. / 24. ))
Out[31]:
In [32]:
sns.distplot(z.index.minute, bins=np.linspace(0, 61, 62))
Out[32]:
In [33]:
sns.distplot((z.index.dayofyear / 365.) + (z.index.hour / 24./365.) + (z.index.minute / 60. / 24. / 365.))
Out[33]:
In [ ]:
In [35]:
sns.distplot(z.index.dayofweek + z.index.hour / 24. + (z.index.minute / 60./ 24. ), bins=np.arange(0., 7.041666666, 1/24.)
, kde=False, norm_hist=False)
plt.xlim(-1/24., 7.+1/24.)
Out[35]:
In [18]:
sns.distplot((((z.index + pd.tseries.offsets.BusinessHour(1))
- z.index).total_seconds() - 3600.)/3600.)
Out[18]:
In [19]:
sns.distplot((((z.index + pd.tseries.offsets.BusinessHour(-1))
- z.index).total_seconds())/3600.)
Out[19]:
In [ ]:
mm = pd.DataFrame(index=z.index)
mm['prev'] = z.index + pd.tseries.offsets.BusinessHour(1)
In [20]:
sns.distplot((((z.index + pd.tseries.offsets.BusinessHour(-1)) - z.index).total_seconds() + 3600.)/ 3600.)
Out[20]:
In [39]:
import iris, iris.plot, cartopy, cartopy.crs
In [37]:
t2, = iris.load('/bigdata/merra2/T2M.nc')
In [58]:
prectotcorr, = iris.load('/bigdata/merra2/PRECTOTCORR.nc')
speed, = iris.load('/bigdata/merra2/SPEED.nc')
In [43]:
bike.head()
Out[43]:
In [74]:
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.coastlines(color='white', resolution='10m')
iris.plot.contourf(t2[0], axes=ax, cmap=plt.cm.viridis, levels=np.arange(270, 300, 2))
plt.colorbar(orientation='horizontal', shrink=0.5)
for x in t2.coord('longitude').points:
ax.axvline(x, color='white', lw=0.3)
for y in t2.coord('latitude').points:
ax.axhline(y, color='white', lw=0.3)
x, y = np.meshgrid(t2.coord('longitude').points, t2.coord('latitude').points)
plt.plot(x.ravel(), y.ravel(), 'o', markersize=5, color='white')
plt.sca(ax)
ax.set_xlim(-77, -71)
ax.set_ylim(39, 43)
plt.gcf().set_size_inches(12, 8)
In [63]:
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.coastlines(color='white', resolution='10m')
iris.plot.contourf(speed[0], axes=ax, cmap=plt.cm.viridis, levels=np.arange(8, 24, 2))
plt.colorbar(orientation='horizontal', shrink=0.5)
plt.sca(ax)
ax.set_xlim(-75, -72)
ax.set_ylim(40, 42)
plt.gcf().set_size_inches(12, 8)
In [68]:
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.coastlines(color='white', resolution='10m')
iris.plot.contourf(prectotcorr[3]*86400*24, axes=ax, cmap=plt.cm.viridis)
plt.colorbar(orientation='horizontal', shrink=0.5)
plt.sca(ax)
ax.set_xlim(-75, -72)
ax.set_ylim(40, 42)
plt.gcf().set_size_inches(12, 8)
In [ ]: