In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datasets import synthetic as synth
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
A = np.arange(12).reshape((4,3))
A
Out[2]:
In [3]:
y = np.array([2,1,1,3])
print np.unique(y).tolist()
idxs = [np.where(y == val)[0] for val in np.unique(y).tolist()]
idxs
# df = pd.DataFrame()
# df["X"] = A
# df['y'] = y
# df
Out[3]:
In [4]:
grouped = [A[idx] for idx in idxs]
print grouped
# map(lambda rows: rows, grouped)
map(lambda rows: rows.mean(axis=0), grouped)
Out[4]:
In [5]:
filter(lambda x: x==1, [1,2])
Out[5]:
In [6]:
x = np.arange(6)
x = np.r_[x, x]
x
Out[6]:
In [7]:
print A*A
np.sum(A * A, axis=1)
Out[7]:
In [8]:
x = np.arange(12)[::-1]
print x
print np.argmin(x)
print np.argmax(x)
In [9]:
B = np.arange(36).reshape((6,3,2))
for el in B: print el
In [11]:
B[0].flatten()
Out[11]:
In [12]:
5 // 2.
Out[12]:
In [13]:
np.linspace(6, 0, num=20)
Out[13]:
In [14]:
np.random.randn()
Out[14]:
In [15]:
x * False
Out[15]:
In [16]:
l = [A, A]
print l
print np.asarray(l).reshape((-1,A.shape[1]))
In [17]:
print A
np.std(A, axis=0)
Out[17]:
In [18]:
z = np.array([2,0,3,1])
print np.argmax(z)
print np.argsort(z)
z[np.argsort(z)]
Out[18]:
In [19]:
np.dot((A - 7).T, A)
A.copy()
A.std()
Out[19]:
In [20]:
from utils.arrays import zNormalizeRows, zNormalize
In [21]:
C = A[::-1]
b = np.array([2,4,3])
b = zNormalize(b)
M = np.random.randn(*A.shape)
M = zNormalizeRows(M)
# woot. Successfully getting distances from dot products
d1 = np.sum((M - b)**2, axis=1)
print d1
dotProd = np.dot(M, b)
m = len(b)
d2 = 2*(m - dotProd)
print d2
print d1 - d2
# assert(np.array_equal(d1, d2)) # false, although max diff is < 1e-15
In [22]:
a = np.array([2,4,3])
np.r_[0, np.cumsum(a)]
Out[22]:
In [23]:
np.prod(a)
Out[23]:
In [23]:
for i in range(4):
print i
for j in range(3):
print i,j
if j > 1:
break
In [24]:
print A
print C
print np.abs(C[1] - A[1])
In [25]:
D = A*C*C
print D
highestStdColIdx = np.argmax(np.std(D, axis=0))
print highestStdColIdx
sortIdxs = np.argsort(D[:,highestStdColIdx])
print sortIdxs
print D[sortIdxs]
In [26]:
A2 = 2. / A
print A2
print np.isinf(A2)
print np.where(np.isinf(A2))[0]
In [27]:
A.flatten().reshape((-1,4))
Out[27]:
In [28]:
x[x < 5] = 3
x
Out[28]:
In [29]:
np.c_[x, x, x]
Out[29]:
In [30]:
range(3, 10, 2)
Out[30]:
In [31]:
msg = "t" if [] else "f"
print msg
In [32]:
# will the original array get modified? Answer: no
A
x = A[0]
x = (x - np.mean(x)) / np.std(x)
print x
print A
In [33]:
for el in {1,3,4}:
print el
In [34]:
s = {1,3,2}
print A[list(s)]
In [35]:
print A
print A.T.tolist()
In [36]:
l = [[1,2,3],[4,5,6], [7], [8,9]]
import itertools
print list(itertools.chain.from_iterable(l))
In [37]:
print np.hsplit(A, A.shape[1])
In [38]:
print x
print isinstance(x, (list, tuple))
In [39]:
# D = np.dstack((A,A))
D = np.empty((4,3,2))
D[:,:,0] = A
D[:,:,1] = A # doing same thing as dstack; apparently tensors just printed weird
print D.shape
print D
# so how can I stack all the 3rd dim stuff end to end vertically in 2 dims?
np.vstack((A, A)) # ie, i want this
# D.reshape((-1, D.shape[1])) # nope
# np.vstack(D[:,:].tolist()) # nope
D[:,:].tolist()
Out[39]:
In [40]:
D = np.empty((2,4,3))
D[0,:,:] = A
D[1,:,:] = A
print D # ah, so this is actually what I wanted; makes sense cuz later idxs together in memory
print "vertStacked:"
print D.reshape((-1, D.shape[2])) # ah, here we go
print "horzStacked:"
print D.reshape((D.shape[1], -1)) # nope, not doing what I want
In [41]:
A[:, 2:2]
Out[41]:
In [42]:
print A[:,::-1]
print np.sort(A[:,::-1], axis=1)
In [43]:
(A.T / np.linalg.norm(A.T, axis=0)).T
Out[43]:
In [44]:
x = np.arange(10)[::-1]
x
print np.argsort(x)[:5]
In [45]:
%matplotlib inline
fig, ax = plt.subplots()
ax.imshow(np.random.random((10,10)))
ax.autoscale(False)
ax.plot(range(11))
plt.show()
In [46]:
def plotVertLine(x, ymin, ymax, ax=None):
if not ax:
ax = plt
ax.plot([x, x], [ymin, ymax], color='k',
linestyle='--', linewidth=2)
In [47]:
fig, ax = plt.subplots()
ax.imshow(np.random.random((10,10)))
ymin, ymax = ax.get_ylim()
plotVertLine(2, ymin, ymax, ax)
In [48]:
plt.get_ylim()
In [ ]:
sums = np.sum(A, axis=1)
A / sums.reshape((4,1))
In [ ]:
a = np.arange(4)
b = a[::-1]
l = zip(a.tolist(), b.tolist())
print l
print zip(*l)
In [49]:
d = dict(zip(a, b))
d
print len(d)
In [50]:
d.get(5, [])
Out[50]:
In [51]:
np.log2(8)
Out[51]:
In [52]:
np.vstack((A, A))
Out[52]:
In [53]:
print A.shape
print A.T.shape
In [54]:
w = np.zeros(3) + 2
A * w
Out[54]:
In [55]:
z = np.zeros(4) + 3
# A * z # breaks
A * z.reshape((-1,1)) # multiplies A[i] by z[i]
Out[55]:
In [56]:
np.sum(A, axis=0)
Out[56]:
In [57]:
df = pd.DataFrame(whateverFile)
In [58]:
import numpy as np
import matplotlib.pyplot as plt
# enable imports; must be run from notebook's directory
import sys
sys.path.append('../python') # get access to all our code
sys.path.append('../figs') # get access to all our figs
sys.path.append('..') # stuff doing relative imports needs to be imported relative to here
%matplotlib inline
from utils import arrays as ar
from datasets import synthetic as synth
In [59]:
seq = synth.multiShapesMotif(noise=.02)
seq = seq[0]
# plt.plot(seq)
# plt.title('Synthetic Time Series of 3 signals')
# plt.xlabel('Sample index')
# plt.savefig('../communicate/shapes.pdf')
seq = synth.trianglesMotif(noise=.04)[0]
plt.plot(seq, linewidth=2)
plt.title('Synthetic 1D Time Series')
plt.xlabel('Sample Index')
plt.savefig('../communicate/triangles.pdf')
In [60]:
np.random.rand(10)
Out[60]:
In [61]:
zip(*[(1,2),(3,4),(5,6)])
Out[61]:
In [62]:
x[np.asarray((1,4,7))]
Out[62]:
In [63]:
x.tolist()
Out[63]:
In [64]:
1 <= 3 <= 5 < 7
Out[64]:
In [65]:
None or 3
Out[65]:
In [66]:
# so condition is first term when using and-or trick
print False and 1 or 3
print True and 1 or 3
In [67]:
l = [[1,2,3],[4,5,6], [7], [8,9]]
del l[2]
l
Out[67]:
In [68]:
from scipy import signal
x2 = np.array([2,1,5])
x2 = np.r_[x2, -np.inf]
print x2
print signal.argrelextrema(x2, np.greater, mode='wrap')
In [69]:
np.empty(0)
Out[69]:
In [70]:
x3 = x - 5
x4 = (x-5)[::-1]
print x3
print x4
print np.minimum(x3, x4)
In [71]:
from scipy.ndimage import filters as filt
x2 = np.array([5,4,3,2,1,4,1,2,3,2,1,5])
print x2
print filt.maximum_filter1d(x2, size=3)
print
print 6 - x2
print filt.minimum_filter(6 - x2, size=3)
print
print 4 - x2
print filt.minimum_filter(4 - x2, size=3)
# so this does exactly what I want, except that it centers the filter
# over each point, while I need to align the 1st position with the point
# to handle subseq overlaps...
# -pretty confident that these aren't equivalent via translations
# or something cuz in the latter case, dist should never be compared
# to points before it
# -is this even what the problem reduces to? Ultimately, we want all
# non-overlapping relative minima subseqs, and this won't address case
# where something is the lowest nearby once the filter moves past an
# even lower one, but still overlaps with the lowest
# -I think what we really want is to find the relative minima, and
# any time one of them has a dist < thresh, we report that one and
# don't report anything else until overlap is over
# -this is pretty simple, and mirrors online recognition case
# -don't think we actually need a min filter here
In [72]:
print l
a, b, (c, d) = l
print a, b, c, d
In [73]:
v = np.arange(2)
s, e = v
print s, e
In [74]:
"foo{}".format(3)
Out[74]:
In [75]:
s = [1, 2]
s.append(3)
print s
In [76]:
# sorted() makes a copy, right? Edit: yes.
s2 = sorted(s)
del s2[1]
print s2
print s
In [77]:
l2 = list(reversed(range(5)))
print l2
print np.sort(l2)
In [78]:
A
np.std(A, axis=1)
Out[78]:
In [79]:
A.T.ravel()
A.reshape(6, -1)
A
Out[79]:
In [80]:
F = np.fft.fft(A, axis=1)
f = np.fft.ifft(F, axis=1)
print A
print f
print np.array(f, dtype=np.int)
In [81]:
x
x.shape[0]
Out[81]:
In [82]:
np.where(x > 3)[0]
Out[82]:
In [83]:
"foo".lower()
Out[83]:
In [84]:
isinstance({'a': 1}, dict)
Out[84]:
In [85]:
l + l
Out[85]:
In [86]:
d = {'a': 1}
d.keys()
Out[86]:
In [87]:
l0 = [('a',2),('b',4)]
l1 = [('aa',12),('bb',14)]
l2 = [('aaa',22),('ccc',24)]
import itertools
prod = list(itertools.product(l0, l1, l2))
print prod
print
paramsDicts = map(lambda combo: dict(combo), prod)
print paramsDicts
In [88]:
i = -1
for i in range(5):
pass
print i # huh...vars in for loop accessible in outer scope, apparently
In [89]:
zip(x, x[::-1])
Out[89]:
In [90]:
np.arange(.1, .9, .2)
Out[90]:
In [91]:
"foo".split('_')[-1]
Out[91]:
In [92]:
int(round(2.9))
Out[92]:
In [93]:
'foo_bar'.replace('_', ' ')
Out[93]:
In [94]:
l + l
Out[94]:
In [95]:
print x
print np.r_[x,x]
print np.r_[A,A]
In [96]:
for row in A.T:
print row
print A
In [97]:
Anorm = A / np.linalg.norm(A)
print np.linalg.norm(Anorm)
In [98]:
T = np.empty((2,4,3))
T[0] = A
T[1] = 2*A
print T
print
print np.sum(T, axis=0)
In [99]:
from scipy.stats import binom
n = 30
p = .3
c = binom.cdf(np.arange(n+1), np.zeros(n+1)+n, p)
# print c
plt.plot(c);
# c_norm = c - np.mean(c)
# plt.plot(c_norm)
plt.figure()
sig = 1. / (1. + np.exp(c))
plt.plot(ar.zeroOneScaleMat(sig))
Out[99]:
In [100]:
from scipy.stats import beta
x = np.linspace(0., 1., 20)
# scaleBy = 5
# a = 3
# b = 7
p = .3
plt.figure()
for scaleBy in range(10, 101, 10):
c = beta.sf(x, p*scaleBy, (1-p)*scaleBy)
plt.plot(x, c)
In [101]:
x = 2 * np.arange(9)[::-1]
print x
print np.argsort(x) # ascending
print np.argsort(x)[::-1] # descending
In [102]:
from scipy.misc import imresize
a = np.arange(10).reshape((-1, 1))
# print a
# print imresize(a, (5, 1), interp='bicubic') # huge garbagey numbers
# print
# print A
# print imresize(A, (2, 3)) # huge garbagey numbers
from scipy.signal import decimate
a = np.arange(10)
print a
print decimate(a, 2, n=1)
print
print A
print decimate(A, 2, axis=0, n=1)
In [103]:
# https://gist.github.com/andrewgiessel/5684769
import numpy as np
import pylab
from scipy.optimize import curve_fit
def sigmoid(x, x0, k):
y = 1 / (1 + np.exp(-k*(x-x0)))
return y
xdata = np.array([-5,-4,-3,-2,-1, .0, 1.0, 3.0, 4.3, 7.0, 8.0, 8.5, 10.0, 12.0])
ydata = np.array([0,0,0,0,0, .01, .02, .04, .11, .43, .7, .89, .95, .99])
popt, pcov = curve_fit(sigmoid, xdata, ydata)
print popt
x = np.linspace(-1, 15, 50)
y = sigmoid(x, *popt)
pylab.plot(xdata, ydata, 'o', label='data')
pylab.plot(x,y, label='fit')
pylab.ylim(0, 1.05)
pylab.legend(loc='best')
pylab.show()
In [104]:
A = np.arange(12, dtype=np.int).reshape((4,3))
B = np.hstack((A, A))
print B
pad = np.zeros((B.shape[0], 1), dtype=np.int)
B = np.hstack((B, pad))
rowIdxs, colIdxs = signal.argrelextrema(B, np.greater, mode='wrap', axis=1)
print
print rowIdxs, colIdxs
print
print np.vstack((rowIdxs, colIdxs)).T
In [105]:
print np.hamming(5)
In [106]:
np.tile((1,3,2), (3, 2))
Out[106]:
In [107]:
ri = (0,1,1,3)
ci = (0,1,2,2)
print A
print A[(ri, ci)]
In [108]:
v = A.flatten()
print A
print v
print v.reshape(A.shape)
In [109]:
from python.algo import ff2
Asmall = A / 5. - 1.
# Asmall = -A / 10.
Aproj = ff2.l1Project(Asmall)
print Asmall
print
print Aproj
print np.sum(np.abs(Aproj))
In [110]:
x = np.arange(8)[:-1]
np.r_[x, np.zeros(4)]
Out[110]:
In [111]:
np.sum(A > 0., axis=0)
Out[111]:
In [112]:
np.empty((3,4))
Out[112]:
In [113]:
M = np.random.randn(4,3)
print M
print np.argmax(M, axis=1)
In [114]:
print x
x[np.array([False])] = x[np.array([False])]
print x
In [115]:
l3 = [[] for i in range(3)]
print l3
l3[0].append(1)
print l3
In [116]:
print np.vstack([A, A])
print np.r_[A, A]
print np.r_[x, x]
In [117]:
print x.reshape((-1,1))
print x
In [118]:
np.random.shuffle(x)
print x
In [119]:
np.var(A)
Out[119]:
In [124]:
B = np.array([[1,1],[0,1]])
print B
print np.argmax(B, axis=1)
In [126]:
np.eye(2)[(np.arange(2), np.arange(2))]
Out[126]:
In [72]:
plt.plot(synth.notSoRandomWalk(100))
# walk, trend = synth.notSoRandomWalk(200)
# plt.plot(walk)
# plt.plot(trend);
In [107]:
# verify relationship between ED and cos sim
xNorm = (x - np.mean(x)) / np.std(x)
y = np.copy(xNorm)
np.random.shuffle(y)
print xNorm
print y
m = len(x)
# via dot prods normalized by length
cosSim_xx = np.dot(xNorm, xNorm) / m # 1.0
cosSim_yy = np.dot(y, y) / m
cosSim_xy = np.dot(xNorm, y) / m # > 1.0 sometimes...
print
print cosSim_xx # yep, 1.0
print cosSim_yy # yep, 1.0
print
print cosSim_xy
# via squared euclidean distances
diff = (xNorm - y)
dist = np.sum(diff*diff)
print 1. - dist/(2*m)
# via unit vects
xNorm2 = x - np.mean(x)
xNorm2 /= np.linalg.norm(xNorm2)
yNorm2 = y - np.mean(y)
yNorm2 /= np.linalg.norm(yNorm2)
print np.dot(xNorm2, yNorm2)
In [109]:
np.array([1])
Out[109]:
In [10]:
# What *exactly* is the 2d conv func doing?
# It appears it flips the filter both horizontally and
# vertically like we'd expect, and valid does in fact
# just return the part where it's entirely in the matrix
from scipy import signal as sig
Lmax = 3
# we set filter = diag(1,2,3)
filt = np.zeros((Lmax, Lmax)) + np.diag(np.arange(Lmax)+1) # zeros except 1s on diag
testMat = np.zeros((8,8))
testMat[0,0] = 1
out = sig.convolve2d(testMat, filt, mode='valid')
plt.imshow(out, interpolation='none')
plt.colorbar()
Out[10]:
In [11]:
testMat[:,:] = 0
# testMat[3, 3] = 1.
testMat[4, 4] = 1.
out = sig.convolve2d(testMat, filt, mode='valid')
plt.imshow(out, interpolation='none')
plt.colorbar()
Out[11]:
In [12]:
testMat[:,:] = 0
testMat[-1, -1] = 1.
out = sig.convolve2d(testMat, filt, mode='valid')
plt.imshow(out, interpolation='none')
plt.colorbar()
Out[12]:
In [14]:
np.sort(np.arange(4))
Out[14]:
In [17]:
"{0}.{1}".format(0, 1)
Out[17]:
In [33]:
# Um, break is just innermost loop, right? -> yep
for i in range(3):
for j in range(4):
print j
break
In [34]:
# how do we get row, col argmax?
idx = np.argmax(A)
row, col = idx // A.shape[1], idx % A.shape[1]
print A.flatten()
print A.flatten()[idx], "==", A[row, col]
In [35]:
sorted([3, 5]) # yep, ascending order
Out[35]:
In [38]:
print A[[1,2]]
print A[(1,2)]
print A[1, 2]
In [41]:
print A
print np.diagonal(A)
print A[:3]
print np.diagonal(A[:3])
In [2]:
In [ ]: