Interactive Demos Here:

To access the interactive demos ONLINE and WITHOUT installation, click the "Binder" Button on each of these.

Like this →

Join the community on gitter

Munich MUC++ Meetup 31.1.2019

A Universal Data Science Framework in C++

Wolf Vollprecht

- w.vollprecht@gmail.com
- @wolfv (github)
- @wuoulf (Twitter)

Structure of the Talk

Intro
Interactive C++ in the browser
Overview over xtensor
A minimal C++17 "xtensor" implementation
Widgets and Plotting for C++

What is QuantStack

Scientific C++ & Python Shop in Paris
Only doing Open Source at the moment!
Currently 4 full timers
Always looking to grow :)



In [ ]:

    
#include <xtensor/xtensor.hpp>



In [ ]:

    
#include <iostream>

std::cout << "Hello MUC++ Meetup!" << std::endl;

Jupyter Notebooks

Everywhere in Data Science
Mix of Markdown + Code
$0 = 1 + e ^ {i \pi}$
Cell-based execution
Widgets (sliders, buttons ... )

xeus + cling
cling: clang based C++ interpreter from the CERN
xeus: C++ implementation of the "Jupyter Kernel Protocol"



In [ ]:

    
int a_var = 5;
// without semicolon: prints value
a_var



In [ ]:

    
a_var = 100

Functions



In [ ]:

    
std::string func(int A)
{
    return std::string("This was executed ") + std::to_string(A) + " 👍";
}

std::cout << func(111 + 222) << std::endl;



In [ ]:

    
double sqr(double a)
{
    return a * a;
}



In [ ]:

    
sqr(9)

Classes



In [ ]:

    
class Foo
{
public:

    virtual ~Foo() {}
    
    virtual void print(double value) const
    {
        std::cout << "Foo value = " << value << std::endl;
    }
};



In [ ]:

    
Foo bar;
bar.print(1.2);



In [ ]:

    
class Bar : public Foo
{
public:

    virtual ~Bar() {}
    
    virtual void print(double value) const
    {
        std::cout << "Bar value = " << 2 * value << std::endl;
    }
};



In [ ]:

    
Foo* bar2 = new Bar;
bar2->print(1.2);
delete bar2;

Fast help



In [ ]:

    
a_Var



In [ ]:

    
#include <vector>



In [ ]:

    
?std::vector



In [ ]:

    
std::vector<double> temp_vec;



In [ ]:

    
?temp_vec.push_back



In [ ]:

    
?xt::xtensor

Magics



In [ ]:

    
#include <algorithm>
#include <vector>

std::vector<double> to_shuffle = {1, 2, 3, 4};



In [ ]:

    
%timeit std::random_shuffle(to_shuffle.begin(), to_shuffle.end());

xeus-cling Installation

https://github.com/QuantStack/xeus-cling
Interactive: https://mybinder.org/v2/gh/QuantStack/xeus-cling/stable?filepath=notebooks/xcpp.ipynb
We distribute our software through conda!
conda install $INSERT_PKG_NAME -c quantstack -c conda-forge
Or use cmake ;)

(Shoutout to binder: free hosted jupyter for Open Source!)

N-D arrays are everywhere

... except for C++
Basic building block of data science
Images, Videos, Music, Point Clouds – all are expressed as N-dimensional data

n-dimensional array computing library
Support for lazy computation, broadcasting
C++14
API close to NumPy
Online demo: https://mybinder.org/v2/gh/QuantStack/xtensor/stable?filepath=notebooks/xtensor.ipynb

Anatomy of a N-dimensional container

Shape of length N
Strides to describe offset in linear data
Example: shape (2, 3) with strides (3, 1)



In [ ]:

    
#include <xtensor/xio.hpp>
#include <xtensor/xarray.hpp>

xt::xarray<double> a = {{1,2,3}, {4,5,6}};

std::cout << a << std::endl;

STL interface



In [ ]:

    
for (auto& el : a)
{
    std::cout << el << std::endl;
}



In [ ]:

    
a(0, 1) = 10;

std::cout << a << std::endl;



In [ ]:

    
std::sort(a.begin(), a.end());

std::cout << a << std::endl;



In [ ]:

    
std::cout << xt::transpose(a) << std::endl;



In [ ]:

    
a.reshape({1, 1, 6, 1});

std::cout << a << std::endl;



In [ ]:

    
a.reshape({2, 3});

Functions



In [ ]:

    
auto xfunc = sin(a) * 2;

std::cout << xfunc << std::endl;

Expression Templates



In [ ]:

    
template <class T>
void print_type(T) { std::cout << __PRETTY_FUNCTION__ << std::endl; }



In [ ]:

    
print_type(xfunc);



In [ ]:

    
auto xfunc_el = xfunc(0, 1);
std::cout << xfunc_el << std::endl;



In [ ]:

    
xt::xarray<double> xfunc_result = xfunc;

std::cout << xfunc_result << std::endl;

Broadcasting

Automagically extend operands to match shape
Broadcasting rules copied from NumPy



In [ ]:

    
xt::xarray<double> brcast = {-10, +1, -10};



In [ ]:

    
std::cout << a * brcast << std::endl;

The following cells do not work on Binder, unfortunately.



In [ ]:

    
// #include <xwidgets/ximage.hpp>
// #include <xtensor-io/ximage.hpp>

// std::vector<char> read_file(const char* filename)
// {
//     std::basic_ifstream<char> file(filename, std::ios::binary);
//     return std::vector<char>((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
// }



In [ ]:

    
// template <class E>
// std::vector<char> to_png_buffer(const xt::xexpression<E>& e)
// {
//     const char* temp_filename = "/tmp/xio_image.png";
//     xt::dump_image(temp_filename, e);
//     return read_file(temp_filename);
// }



In [ ]:

    
// template <class E>
// auto display(const E& e)
// {
//     xw::image ls_image;
//     ls_image.value = to_png_buffer(e);
//     return ls_image;
// }



In [ ]:

    
// auto lightsaber = xt::load_image("./images/saber.png");



In [ ]:

    
// xw::image ls_image;
// ls_image.value = to_png_buffer(lightsaber);
// ls_image



In [ ]:

    
// lightsaber = xt::load_image("./images/saber.png");

// lightsaber = lightsaber * xt::xarray<double>({0.2, 0.8, 0.2});

// ls_image.value = to_png_buffer(lightsaber);

Memory Layout

Freely choose row, column-major or custom strides



In [ ]:

    
xt::xarray<double, xt::layout_type::column_major> a_cm = {{1,2,3}, {4,5,6}};

std::cout << a_cm << std::endl;

Container optimizations



In [ ]:

    
// uses a std::array for shape & strides
#include <xtensor/xtensor.hpp>

xt::xtensor<double, 2> b = {{1, 2, 3}, {4, 5, 6}};
std::cout << b << std::endl;



In [ ]:

    
// uses a fixed size container for data (std::array)
// strides computed constexpr at compile time (no storage)
#include <xtensor/xfixed.hpp>

xt::xtensor_fixed<double, xt::xshape<2, 3>> c = {{1,2,3}, {4,5,6}};
std::cout << c << std::endl;

Views

powerful views to reference sub-arrays
compute shapes statically if possible



In [ ]:

    
#include <xtensor/xview.hpp>
#include <xtensor/xio.hpp>
using namespace xt::placeholders;

std::cout << a << std::endl;



In [ ]:

    
std::cout << xt::view(a, 1, xt::all()) << std::endl;



In [ ]:

    
// numpy equivalent: a[:, ::2]
std::cout << xt::view(a, xt::all(), xt::range(_, _, 2)) << std::endl;



In [ ]:

    
xt::view(a, xt::all(), xt::range(_, _, 2)) *= 100;

std::cout << a << std::endl;



In [ ]:

    
std::cout << a << std::endl;



In [ ]:

    
std::cout << xt::view(a, xt::keep(0, 0, 0) , xt::all()) << std::endl;

Adapting a 1D container



In [ ]:

    
#include <vector>
#include <xtensor/xadapt.hpp>

std::vector<double> vdata = {1, 2, 3, 4, 5, 6, 7, 8, 9};
auto adapted_vdata = xt::adapt(vdata, {3, 3});



In [ ]:

    
std::cout << adapted_vdata << std::endl;



In [ ]:

    
adapted_vdata(1, 2)



In [ ]:

    
#include <xtensor/xmath.hpp>

std::cout << sum(adapted_vdata) << std::endl;



In [ ]:

    
std::cout << sum(adapted_vdata, {1}) << std::endl;

NumPy to xtensor Cheatsheet

Many functions that I haven't discussed:

More reducers: mean, prod ...
Accumulators: cumsum, cumprod ...
Random numbers, generators
Sorting, filtering
Optional (masked) values
NPY, CSV file format support

https://xtensor.readthedocs.io/en/latest/numpy.html

xtensor

Modular sub-packages:
- xsimd: for SIMD vectorization support (batch classes, hi-speed implementations of trigo, exp, ... functions)
- Intel TBB: parallelization
On top of xtensor:
- xtensor-blas: bindings to BLAS & LAPACK – matrix multiplication, inverse, svd, ...
- xtensor-io: image, sound, NPZ, HDF5, (soon video support)
- xtensor-fftw, xtensor-interpolate and more

xtensor as a building block

xtensor-python / julia / R
Seamlessly use arrays from each language
Better performance for dynamic languages

Binding a Python / NumPy container

double sum_of_sines(xt::pyarray<double>& m)
{
    auto sines = xt::sin(m);  // sines does not actually hold values.
    return std::accumulate(sines.begin(), sines.end(), 0.0);
}

PYBIND11_MODULE(xtensor_python_test, m)
{
    xt::import_numpy();
    m.doc() = "Test module for xtensor python bindings";

    m.def("sum_of_sines", sum_of_sines, "Sum the sines of the input values");
}

xtensor vs Blitz++

No runtime dimension container in Blitz (xarray)
Blitz++ unmaintained/dead
Legacy code, already broken with new compilers

xtensor vs Eigen3

n-Dimensions from the start (Eigen has Tensor Module in /unsupported)
modularity vs monolithical Eigen
API subjectively "nicer", modern C++ features
extensibility (Python/Julia/R)

Walkthrough qview + qfunc

Walk-through a simple template expression engine in C++17
Supporting some xtensor mechanisms!

Interactive Widgets in C++

Easy way to create simple graphical interfaces
such as sliders, buttons, boxes ... much more!



In [ ]:

    
#include "xwidgets/xslider.hpp"
xw::slider<double> slider_exmpl;

slider_exmpl



In [ ]:

    
#include "xwidgets/xslider.hpp"
xw::slider<double> slider;

slider



In [ ]:

    
slider.value = 20;      // Modifying properties of widgets triggers the update of the frontend.



In [ ]:

    
slider.value()          // Reading the value requires using the call operator



In [ ]:

    
#include "xcpp/xdisplay.hpp"
xcpp::display(slider);       // xcpp::display can be called to explicitely trigger a the display of an object.



In [ ]:

    
xcpp::display(slider);       // xcpp::display can be called to explicitely trigger a the display of an object.



In [ ]:

    
// changine some more properties
slider.max = 40;
slider.style().handle_color = "blue";
slider.orientation = "vertical";
slider.description = "A slider";



In [ ]:

    
#include <iostream>
#include "xwidgets/xbutton.hpp"

xw::button bt;



In [ ]:

    
void foo()
{
    std::cout << "Clicked!" << std::endl;
}



In [ ]:

    
bt.on_click(foo);
bt



In [ ]:

    
bt.description = "button";
bt.button_style = "success";

Value Semantics of Widgets



In [ ]:

    
xw::button bt_copy = bt;



In [ ]:

    
bt_copy



In [ ]:

    
bt_copy.style().button_color = "green";
bt.style().button_color = "red";



In [ ]:

    
#include "xwidgets/xdropdown.hpp"

xw::dropdown dd(std::vector<std::string>({"foo", "bar"}), "foo");

dd



In [ ]:

    
dd.value()



In [ ]:

    
#include "xwidgets/xtab.hpp"
#include "xwidgets/xbutton.hpp"
#include "xwidgets/xslider.hpp"

xw::tab tabs;

xw::slider<double> tab_slid;
tabs.add(bt);
tabs.add(tab_slid);



In [ ]:

    
tabs



In [ ]:

    
tabs.set_title(0, "zero");
tabs.set_title(1, "one");



In [ ]:

    
#include "xwidgets/xhtml.hpp"
xw::html html;

html.value = R"xhtml(
    <div style="
        width: 50%;
        height: 100px;
        background: #323;
        color: white;
        text-align: center;"
        >Some HTML
    </div>
)xhtml";

html

xwidgets

All other base widgets supported:
TextArea, HTML, Text Input, Password, Box, Gamepad Controller, Color Picker ...

More demos

xplot
xleaflet
xvolume

The future

Already released: xframe, Pandas-like dataframe in C++
More widgets!
More bindings!
More speed (always!)

Thanks!

Come contribute! Or talk to us 24/7 on gitter: https://gitter.im/QuantStack/Lobby

Check out: https://github.com/QuantStack

Twitter: @wuoulf | Github: @wolfv

xtensor tricks

Closure type
Keyword arguments
SIMDification of Lambdas

xtensor move semantics

xfunction and xview need to decide to store value or reference value
closure type: compliment to move semantics of STL
perfect forwarding let's you tell who should own memory

Problem: Dangling reference

auto sin_of_1234() {
    xarray<double> tmp = {{1,2}, {3, 4}};
    return xt::sin(tmp);
}

Solution: Use std::move

auto sin_of_1234() {
    xarray<double> tmp = {{1,2}, {3, 4}};
    return xt::sin(std::move(tmp));
}

Closure Type

if lvalue reference: keep reference
if rvalue reference, or value: store value

template <class S>
struct closure_type {
    using underlying_type = std::conditional_t<
        std::is_const<std::remove_reference_t<S>>::value,
        const std::decay_t<S>,
        std::decay_t<S>>;
    using type = typename std::conditional<
        std::is_lvalue_reference<S>::value,
        underlying_type&,
        underlying_type>::type;
};

template <class CT>
struct reference_or_store_value {
    CT m_value;
}

template <class T>
auto func(T&& val) {
    using CT = closure_type_t<T>;
    return reference_or_store_value<CT>{ std::forward<T>(val) };
}

Problem: keyword args for clean API

Some functions take many independent arguments

E.g. NumPy reduction:

np.sum(array, initial=123.0, keep_dims=True,
       evaluation_strategy="lazy")

Goals

Arguments unordered
Can influence return type – allow tag dispatching
Allow for some checking on args

C++ Keyword Arguments

No standard way to do keyword args in C++14
C++ designated initializers arrive by C++20 :/

struct params { int x; double c = 0.0; };

void func(const params& p);

int main()
{
    func({.x = 123, .c = 12.5 });
}

C++14 Solution

Goal: use operator overloading to do something like

xt::sum(array, keep_dims | initial(10.0) | rapid_eval);

Problem: keep_dims modifies the return type! (xtensor<T, 0> vs xtensor<T, N>)



In [ ]:

    
#include <iostream> 
template <template <class...> class A, class... AX, class X>
auto operator|(const A<AX...>& args, X&& rhs)
{
    return std::tuple_cat(args, rhs);
}



In [ ]:

    
struct _keep_dims {};
constexpr auto keep_dims = std::tuple<_keep_dims>{};

struct _rapid_evaluation {};
constexpr auto rapid_eval = std::tuple<_rapid_evaluation>{};



In [ ]:

    
template <class T = double>
struct xinitial {
    xinitial(T val)
        : m_val(val)
    {
    }
    
    T value() { return m_val; }
    T m_val;    
};



In [ ]:

    
template <class T>
constexpr auto initial(T val)
{
    return std::make_tuple(xinitial<T>(val));
}



In [ ]:

    
template <std::ptrdiff_t I, class T, class Tuple>
struct tuple_idx_of_impl;

template <std::ptrdiff_t I, class T>
struct tuple_idx_of_impl<I, T, std::tuple<>>
{
    static constexpr std::ptrdiff_t value = -1;
};

template <std::ptrdiff_t I, class T, class... Types>
struct tuple_idx_of_impl<I, T, std::tuple<T, Types...>>
{
    static constexpr std::ptrdiff_t value = I;
};

template <std::ptrdiff_t I, class T, class U, class... Types>
struct tuple_idx_of_impl<I, T, std::tuple<U, Types...>>
{
    static constexpr std::ptrdiff_t value = tuple_idx_of_impl<I + 1, T, std::tuple<Types...>>::value;
};

template <class T, class Tuple>
struct tuple_idx_of
    : std::integral_constant<std::ptrdiff_t, tuple_idx_of_impl<0, T, Tuple>::value>
{
};

template <class R, class T, class L, std::ptrdiff_t I>
void safe_extract(R& target, const T& args, L&& extractor, std::integral_constant<std::ptrdiff_t, I>)
{
    target = extractor(std::get<I>(args));
}

template <class R, class T, class L>
void safe_extract(R& target, const T& args, L&&, std::integral_constant<std::ptrdiff_t, -1>)
{
}

template <class S, class R, class T, class L>
constexpr void extract(R& target, const T& args, L&& extractor)
{
    safe_extract(target, args, std::forward<L>(extractor), tuple_idx_of<S, T>{});
}



In [ ]:

    
template <class T>
struct reducer_args
{
    using has_keep_dims = std::conditional_t<
        tuple_idx_of<_keep_dims, T>::value != -1,
        std::true_type,
        std::false_type>;
    
    reducer_args(const T& tpl)
    {
        extract<xinitial<int>>(initial_value, tpl, [](auto v) { return v.value(); });;
        keep_dims = tuple_idx_of<_keep_dims, T>::value != -1;
        rapid_evaluation = tuple_idx_of<_rapid_evaluation, T>::value != -1;
    }

    // Default arguments
    bool rapid_evaluation = false;
    bool keep_dims = false;
    double initial_value = 0;
};



In [ ]:

    
#include <xtensor/xtensor.hpp>

template <class T, std::size_t N>
auto reducer_impl(xt::xtensor<T, N> arr, std::true_type /* keep_dims */)
{
    std::cout << "Returning an xtensor with " << N << " dimensions!" << std::endl;
    return xt::xtensor<T, N>();
}

template <class T, std::size_t N>
auto reducer_impl(xt::xtensor<T, N> arr, std::false_type /* keep_dims */)
{
    std::cout << "Returning an xtensor with " << 0 << " dimensions!" << std::endl;
    return xt::xtensor<double, 0>();
}

template <class T, class A>
auto reducer(T&& array, A args)
{
    reducer_args<A> rargs(args);

    std::cout << "Keep Dims  : " << rargs.keep_dims << std::endl;
    std::cout << "Rapid Eval : " << rargs.rapid_evaluation << std::endl;
    std::cout << "Initial Val: " << rargs.initial_value << std::endl;

    
    using has_keep_dims = typename reducer_args<A>::has_keep_dims;
    return reducer_impl(std::forward<T>(array), has_keep_dims{});
}



In [ ]:

    
auto input = xt::xtensor<double, 4>();



In [ ]:

    
reducer(input, rapid_eval | initial(333) | keep_dims);

Automatically SIMD-ified lambdas

auto - lambdas are great!
Can be used inside of xtensor to generate SIMD-ified code
e.g.

auto square = [](auto a) { return a * a; };

// equivalent to 
struct square
{
    template <class A>
    auto operator()(A a)
    {
        return a * a;
    }
}

describe data flow

xtensor SIMD-ification

overloaded operators on xsimd::batch for parallel operations
e.g.

xsimd::batch<float, 8> a (1,2,3,4,5,6,7,8), b(10);

auto res = a + b;

executes a parallel + on 8 elements

xtensor SIMD-ification

auto square = [](auto a) -> decltype(a * a) {
    return a * a;
};

Could instantiate auto - lambda with scalar, or xsimd batch!
Idea: perform automatic detection if xsimd is valid for lambda

template <class F, class... T, 
          typename = decltype(std::declval<F>()(std::declval<T>()...))>
std::true_type  supports_test(const F&, const T&...);
std::false_type supports_test(...);

template <class... T> struct supports;

template <class F, class... T> struct supports<F(T...)>
    : decltype(supports_test(std::declval<F>(), std::declval<T>()...)) {};

adapted from alfC on StackOverflow

template <class F>
struct lambda_wrapper {
    F m_lambda;

    template <class... T, 
              class = std::enable_if_t<detail::supports<F(T...)>::value>>
    auto simd_apply(T... args) const
    {
        return m_lambda(args...);
    }
}

template <class E1>
inline auto square(E1&& e1) noexcept
{
    auto fnct = [](auto x) -> decltype(x * x) {
        return x * x;
    };
    return make_lambda_xfunction(std::move(fnct), std::forward<E1>(e1));
}



In [ ]:

    
#include <xtensor/xio.hpp>
#include <xtensor/xtensor.hpp>

xt::xtensor<double, 2> in_square({{1, 2, 3, 4}, {5, 6, 7, 8}});



In [ ]:

    
std::cout << xt::square(in_square) << std::endl;