In [1]:
import pandas as pd, numpy as np
In [2]:
#df = pd.read_csv('/home/just/Desktop/vovan/Аэрофлот_1day_01012008_15052016.txt',sep=';')#,error_bad_lines=False)
#df = pd.read_csv('/home/just/Desktop/vovan/Сургнфгз_1day_01012008_15052016.txt',sep=';')#,error_bad_lines=False)
df = pd.read_csv('data/Роснефть_1day_01012008_15052016.txt',sep=';')#,error_bad_lines=False)
In [3]:
renamer = {c:c.strip('<>') for c in df.columns}
df.rename(columns=renamer,inplace=True)
In [4]:
df.dtypes
Out[4]:
In [5]:
df.head(15)
Out[5]:
In [6]:
for c in df.columns:
print('\n',c)
print(df[c].describe())
In [7]:
len(df)
Out[7]:
In [8]:
ds = df.sample(10)
In [9]:
ds
Out[9]:
In [10]:
DF = df
DF.sort_values('DATE',inplace=True)
dates = DF['DATE'].values
values = DF['CLOSE'].values
VALUES = dict(zip(dates,values))
delta_map = {}
to_plot = []
deltas = []
for x in dates:
row = delta_map.setdefault(x,{})
for y in dates:
delta = VALUES[x] - VALUES[y] if x >= y else 0
row[y] = delta
if x > y:
deltas.append(delta)
to_plot.append({
'buy' : x,
'sell' : y,
'value': delta,
})
dA = pd.DataFrame(delta_map)
In [11]:
dA;
In [12]:
DF;
In [13]:
%matplotlib inline
In [14]:
import matplotlib.pyplot as plt
from matplotlib import colors
class MidpointNormalize(colors.Normalize):
def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
self.midpoint = midpoint
colors.Normalize.__init__(self, vmin, vmax, clip)
def __call__(self, value, clip=None):
# I'm ignoring masked values and all kinds of edge cases to make a
# simple example...
x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
return np.ma.masked_array(np.interp(value, x, y))
In [107]:
def tics_formatter(x):
x = int(x)
if (x>=0)and(x<len(dates)):
d = dates[x]
return '{}-{:02d}-{:02d}'.format(d // 10000, d // 100 % 100, d % 100)
else:
return ''
fig = plt.figure(figsize=(8, 8));
ax_dotmatrix = fig.add_subplot(223);
ax_dotmatrix.imshow(dA,norm=MidpointNormalize(midpoint=0.),cmap=plt.cm.bwr_r);
#fig.colorbar();
ax_dyn_x = fig.add_subplot(221, sharex=ax_dotmatrix);
ax_dyn_y = fig.add_subplot(224, sharey=ax_dotmatrix);
ax_dyn_x.get_xaxis().set_visible(False)
ax_dyn_y.get_yaxis().set_visible(False)
ax_dyn_x.plot(values);
ax_dyn_y.plot(values,range(len(dates)));
date_xlabels = map(tics_formatter,plt.xticks()[0])
date_ylabels = map(tics_formatter,plt.yticks()[0])
ax_dotmatrix.set_xticklabels(date_xlabels,rotation='vertical');
ax_dotmatrix.set_yticklabels(date_ylabels)
ax_hist = fig.add_subplot(222);
ax_hist.hist(deltas,bins=24,normed=True);
ax_hist.set_xlim(auto=True);
ax_hist.set_ylim(auto=True);
In [17]:
np.mean(deltas),np.median(deltas)
Out[17]: