In [1]:
import matplotlib.pyplot as plt
In [2]:
cd ..
In [3]:
import json
from os import path
import pandas as pd
import gym.envs
import numpy as np
num_steps = 100
gym.envs.register(id='obs-v2',
entry_point='gym_bs.envs:EuropeanOptionEnv',
kwargs={'t': num_steps,
'n': 1,
's0': 49,
'k': 50,
'max_stock': 1,
'sigma': .1})
params = dict(n_iter=10000, batch_size=50, elite_frac=0.3)
env = gym.make('obs-v2')
env = gym.wrappers.Monitor(env, "/tmp/gym-results/obs-v2", video_callable=False, write_upon_reset=True, force=True)
In [4]:
observation = env.reset()
In [5]:
%%time
df = pd.DataFrame.from_dict({'reward': [], 'observation': []})
for _ in range(10):
observation = env.reset()
done = False
while not done:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
df = df.append( pd.DataFrame.from_dict({'reward': reward, 'observation': [observation]}))
In [6]:
%matplotlib inline
df.reward.clip_lower(-15).hist(bins=100)
Out[6]:
In [7]:
%%time
df = pd.DataFrame.from_dict({'reward': [], 'underlying': [], 'tau': [], 'stocks': []})
action = np.array([0.])
for _ in range(1000):
observation = env.reset()
done = False
while not done:
# action = env.action_space.sample()
observation, reward, done, info = env.step(action)
df = df.append( pd.DataFrame.from_dict({'reward': reward,
'underlying': observation[0],
'tau': observation[1],
'stocks': observation[2]}))
In [12]:
%matplotlib inline
df.reward.clip_lower(-1500).hist(bins=100)
Out[12]:
In [11]:
%matplotlib inline
# fig = plt.Figure()
df.underlying.hist(bins=20, figsize=(10, 6))
Out[11]:
In [ ]:
In [10]:
done = False
df = pd.DataFrame.from_dict({'reward': [], 'observation': []})
while not done:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
df = df.append( pd.DataFrame.from_dict('reward': reward, 'observation': observation))