In [1]:
import numpy as np
import subprocess
%matplotlib nbagg
import matplotlib.pyplot as plt
import pyfftw

In [7]:
def generate_data_3D(
        n,
        dtype = np.complex128,
        p = 1.5):
    """
    generate something that has the proper shape
    """
    assert(n % 2 == 0)
    a = np.zeros((n, n, n/2+1), dtype = dtype)
    a[:] = np.random.randn(*a.shape) + 1j*np.random.randn(*a.shape)
    k, j, i = np.mgrid[-n/2+1:n/2+1, -n/2+1:n/2+1, 0:n/2+1]
    k = (k**2 + j**2 + i**2)**.5
    k = np.roll(k, n//2+1, axis = 0)
    k = np.roll(k, n//2+1, axis = 1)
    a /= k**p
    a[0, :, :] = 0
    a[:, 0, :] = 0
    a[:, :, 0] = 0
    ii = np.where(k == 0)
    a[ii] = 0
    ii = np.where(k > n/3)
    a[ii] = 0
    return a

n = 31*4
N = 256

Kdata0 = generate_data_3D(n, p = 2).astype(np.complex64)
Kdata1 = generate_data_3D(n, p = 2).astype(np.complex64)
Kdata2 = generate_data_3D(n, p = 2).astype(np.complex64)
Kdata0.T.copy().astype('>c8').tofile("Kdata0")
Kdata1.T.copy().astype('>c8').tofile("Kdata1")
Kdata2.T.copy().astype('>c8').tofile("Kdata2")

In [8]:
def padd_with_zeros(
        a,
        n,
        odtype = None):
    if (type(odtype) == type(None)):
        odtype = a.dtype
    assert(a.shape[0] <= n)
    b = np.zeros((n, n, n/2 + 1), dtype = odtype)
    m = a.shape[0]
    b[     :m/2,      :m/2, :m/2+1] = a[     :m/2,      :m/2, :m/2+1]
    b[     :m/2, n-m/2:   , :m/2+1] = a[     :m/2, m-m/2:   , :m/2+1]
    b[n-m/2:   ,      :m/2, :m/2+1] = a[m-m/2:   ,      :m/2, :m/2+1]
    b[n-m/2:   , n-m/2:   , :m/2+1] = a[m-m/2:   , m-m/2:   , :m/2+1]
    return b

def transform_py(bla):
    b = padd_with_zeros(bla, N)
    c = np.zeros((N, N, N), np.float32)
    t = pyfftw.FFTW(
        b, c,
        axes = (0, 1, 2),
        direction = 'FFTW_BACKWARD',
        flags = ('FFTW_ESTIMATE',),
        threads = 2)
    t.execute()
    return c

import chichi_misc as cm

def array_to_8cubes(
    a,
    odtype = None):
    assert(len(a.shape) >= 3)
    assert((a.shape[0] % 8 == 0) and
           (a.shape[1] % 8 == 0) and
           (a.shape[2] % 8 == 0))
    if type(odtype) == type(None):
        odtype = a.dtype
    c = np.zeros(
        ((((a.shape[0] // 8)*(a.shape[1] // 8)*(a.shape[2] // 8)),) +
         (8, 8, 8) +
         a.shape[3:]),
        dtype = odtype)
    zi = np.zeros( c.shape[0], dtype = np.int)
    ri = np.zeros((c.shape[0], 3, 2), dtype = np.int)
    ii = 0
    for k in range(a.shape[0]//8):
        for j in range(a.shape[1]//8):
            for i in range(a.shape[2]//8):
                tindex = np.array([k, j, i])
                zi[ii] = cm.grid3D_to_zindex(tindex)
                ri[ii, 0] = np.array([8*tindex[0], 8*(tindex[0]+1)])
                ri[ii, 1] = np.array([8*tindex[1], 8*(tindex[1]+1)])
                ri[ii, 2] = np.array([8*tindex[2], 8*(tindex[2]+1)])
                ii += 1
    for ii in range(zi.shape[0]):
        c[zi[ii]] = a[ri[ii, 0, 0]:ri[ii, 0, 1],
                      ri[ii, 1, 0]:ri[ii, 1, 1],
                      ri[ii, 2, 0]:ri[ii, 2, 1]]
    return c

d0 = transform_py(Kdata0)
d1 = transform_py(Kdata1)
d2 = transform_py(Kdata2)

Rdata_py_tmp = np.array([d0, d1, d2]).transpose((1, 2, 3, 0))

Rdata_py = array_to_8cubes(Rdata_py_tmp)

# i0 = np.random.randint(16)
# i1 = np.random.randint(16)
# i2 = np.random.randint(16)
# z = cm.grid3D_to_zindex(np.array([i0, i1, i2]))

In [15]:
def compute_cpp_data(
        branch = None,
        nfiles = 16):
    if not (type(branch) == type(None)):
        subprocess.call(['git', 'checkout', branch])
    if subprocess.call(['make', 'full.elf']) == 0:
        subprocess.call([#'valgrind',
                         #'--tool=callgrind',
                         #'--callgrind-out-file=tmp.txt',
                         'time',
                         'mpirun.mpich',
                         '-np',
                         '8',
                         './full.elf',
                         '{0}'.format(n),
                         '{0}'.format(N),
                         '{0}'.format(nfiles),
                         '3'])
    else:
        print ('compilation error')
        return None
    
def get_cpp_data(
        branch = None,
        run = True,
        nfiles = 16):
    if run:
        for nf in range(nfiles):
            subprocess.call(
                ['rm',
                 'Rdata_z{0:0>7x}'.format(nf*Rdata_py.shape[0]//nfiles)])
        compute_cpp_data(branch, nfiles = nfiles)
    Rdata = []
    for nf in range(nfiles):
        Rdata.append(np.fromfile(
        'Rdata_z{0:0>7x}'.format(nf*Rdata_py.shape[0]//nfiles),
        dtype = np.float32).reshape(-1, 8, 8, 8, 3))
    return np.concatenate(Rdata)

#Rdata = get_cpp_data(branch = 'develop')
# develop says 30 secs, inplace fft is 28 secs
#Rdata = get_cpp_data(branch = 'feature-inplace_fft')
Rdata = get_cpp_data(run = True, nfiles = 16)

In [16]:
distance = np.max(np.abs(Rdata_py - Rdata), axis = (1, 2, 3, 4))
print(np.max(distance))
if np.max(distance) > 1e-5:
    ax = plt.figure(figsize=(6,2)).add_subplot(111)
    ax.plot(distance)
    i0 = np.random.randint(8)
    i1 = np.random.randint(8)
    i2 = np.random.randint(8)
    z = cm.grid3D_to_zindex(np.array([i0, i1, i2]))
    #z = 0
    print(cm.zindex_to_grid3D(z))
    s = np.max(np.abs(Rdata_py[None, z, :, :, :, 1] - Rdata[..., 1]),
               axis = (1, 2, 3))
    z1 = np.argmin(s)
    print(z, z1, s[z1])
        #print(Rdata[z1] - Rdata_py[z1])
    ta0 = Rdata_py.ravel()
    ta1 = Rdata.ravel()
    print (Rdata_py[254:259, 7, 4, 3, 1])
    print (Rdata[254:259, 7, 4, 3, 1])
    print (ta0[ta0.shape[0]/2-1:ta0.shape[0]/2+7])
    print (ta1[ta1.shape[0]/2-1:ta1.shape[0]/2+7])
else:
    print('distance is small')
print(np.max(np.abs(Rdata)))


0.0
distance is small
15.2714

In [ ]: