Random dataset generation

This example shows generation of two-dimensional dataset follows bivariate Gaussian distribution.

Gsl_randist.bivariate_gaussian is a binding to gsl_ran_bivariate_gaussian, a function generates a two random numbers following bivariate Gaussian distribution defined as

$$ \newcommand{d}{\mathrm{d}} p(x,y) \d x \d y = \frac{1}{2 \pi \sigma_x \sigma_y \sqrt{1-\rho^2}} \exp\left( -\frac{x^2 / \sigma_x^2 + y^2 / \sigma_y^2 - 2 \rho x y / (\sigma_x \sigma_y)}{2(1-\rho^2)} \right) \d x \d y $$

where $\sigma_x$ and $\sigma_y$ are standard deviations of $x$ and $y$ respectively, and $\rho \in [-1,+1]$ is a correlation coefficient between $x$ and $y$.


In [1]:
#require "gsl";;
#use "archimedes_iocaml.ml";;


/home/opam/.opam/4.04.1/lib/ocaml/unix.cma: loaded
/home/opam/.opam/4.04.1/lib/ocaml/bigarray.cma: loaded
/home/opam/.opam/4.04.1/lib/gsl: added to search path
/home/opam/.opam/4.04.1/lib/gsl/gsl.cma: loaded
/home/opam/.opam/4.04.1/lib/easy-format: added to search path
/home/opam/.opam/4.04.1/lib/easy-format/easy_format.cmo: loaded
/home/opam/.opam/4.04.1/lib/biniou: added to search path
/home/opam/.opam/4.04.1/lib/biniou/biniou.cma: loaded
/home/opam/.opam/4.04.1/lib/yojson: added to search path
/home/opam/.opam/4.04.1/lib/yojson/yojson.cmo: loaded
/home/opam/.opam/4.04.1/lib/ocaml/str.cma: loaded
/home/opam/.opam/4.04.1/lib/atd: added to search path
/home/opam/.opam/4.04.1/lib/atd/atd.cma: loaded
/home/opam/.opam/4.04.1/lib/atdgen: added to search path
/home/opam/.opam/4.04.1/lib/atdgen/atdgen.cma: loaded
/home/opam/.opam/4.04.1/lib/bytes: added to search path
/home/opam/.opam/4.04.1/lib/result: added to search path
/home/opam/.opam/4.04.1/lib/result/result.cma: loaded
/home/opam/.opam/4.04.1/lib/lwt: added to search path
/home/opam/.opam/4.04.1/lib/lwt/lwt.cma: loaded
/home/opam/.opam/4.04.1/lib/lwt/lwt-log.cma: loaded
/home/opam/.opam/4.04.1/lib/lwt/lwt-unix.cma: loaded
/home/opam/.opam/4.04.1/lib/ctypes: added to search path
/home/opam/.opam/4.04.1/lib/ctypes/ctypes.cma: loaded
/home/opam/.opam/4.04.1/lib/ctypes/ctypes-top.cma: loaded
/home/opam/.opam/4.04.1/lib/ctypes/ctypes-foreign-base.cma: loaded
/home/opam/.opam/4.04.1/lib/ctypes/ctypes-foreign-unthreaded.cma: loaded
/home/opam/.opam/4.04.1/lib/iocaml-kernel: added to search path
/home/opam/.opam/4.04.1/lib/iocaml-kernel/iocaml_lib.cma: loaded
/home/opam/.opam/4.04.1/lib/cairo2: added to search path
/home/opam/.opam/4.04.1/lib/cairo2/cairo2.cma: loaded
/home/opam/.opam/4.04.1/lib/ocaml/dynlink.cma: loaded
/home/opam/.opam/4.04.1/lib/ocaml/camlp4: added to search path
/home/opam/.opam/4.04.1/lib/archimedes: added to search path
/home/opam/.opam/4.04.1/lib/archimedes/archimedes_internals.cma: loaded
/home/opam/.opam/4.04.1/lib/archimedes/archimedes_toploop.cma: loaded
Module Archimedes loaded and aliased as A.
/home/opam/.opam/4.04.1/lib/archimedes/archimedes_cairo.cma: loaded
module Archimedes_iocaml : sig  end

In [2]:
let rng = Gsl_rng.make Gsl_rng.MT19937 ;; (* Mersenne Twister *)

(* Generate positive examples *)
let positive_xys =
    Array.init 100 (fun _ -> Gsl_randist.bivariate_gaussian rng ~sigma_x:0.4 ~sigma_y:0.9 ~rho:0.4)
    |> Array.map (fun (x, y) -> (x +. 0.5, y -. 0.1))

(* Generate negative examples *)
let negative_xys =
    Array.init 100 (fun _ -> Gsl_randist.bivariate_gaussian rng ~sigma_x:0.6 ~sigma_y:1.2 ~rho:0.3)
    |> Array.map (fun (x, y) -> (x -. 0.8, y +. 0.4))


Out[2]:
val rng : Gsl_rng.t = <abstr>
Out[2]:
val positive_xys : (float * float) array =
  [|(0.473961135502044439, -0.012970389488107234);
    (-0.129328309963855026, -0.739066782463754901);
    (0.611770960689301702, 1.38175245248746337);
    (0.980861134355586861, 0.937928884197932);
    (0.258338774923684233, 0.505326681173164594);
    (0.530330810877653769, -1.12646726780869266);
    (0.282150823513374727, -2.27302751526679669);
    (0.236318926335476742, -0.897626465429733567);
    (0.455881933556723351, -0.171951266431705496);
    (0.571727361648575516, 0.701616091983125467);
    (1.34100419921166791, 0.64234675547029263);
    (0.598966555643246, -1.08041273183328324);
    (0.357898429129681839, -0.778884235460391761);
    (0.285528739028345968, -0.14313917328830944);
    (1.04643816525960576, 1.07729791628968341);
    (0.617138063373849466, -0.446805964531099109);
    (0.629671383070630641, -0.509584733471038165);
    (0.062533109659662578, -0.487873129355432811);
    (0.129077658202316037, -0.986368054243515813);
    (0.53755241526169, -0.748993863646964297);
    (0.369963569131545, 0.288196207630560641);
    (-0.0887110300005968799, -0.933050235545091811);
    (0.0823404977969722784, -0.714686134868933509);
    (0.708189174666825627, -0.668926307523105224);
    (0.469604680872181768, -0.608915674307867194);
    (-0.289065549286953383, -0.756860191881582267);
    (-0.0382640518591267353, -0.545880337049717168);
    (1.03955480822620872, -0.233648049674908709);
    (0.00946592583518840458, -0.660452964104741302);
    (0.924834614687084611, 1.49948758121106396);
    (1.2626736748700873, 0.416089311625301939);
    (0.234961603937378027, -0.55357425896105783);
    (0.147334008748843448, -0.531410369472795296);
    (0.180928913767983868, -2.48511982611502713);
    (1.01441841522073872, 0.839268121084365881);
    (0.553557227737101143, -0.0815842076204309485);
    (0.583320809080576, -0.211572796295703136);
    (0.711708261625688676, 0.680316756233119713);
    (-0.0633408790224968543, 0.69402329064987589);
    (0.446997135602668738, -0.471275017372961846);
    (1.29396476041964559, 1.5901929401396937);
    (0.496455551299589515, 1.37588425257003433);
    (0.353704863145022697, -0.834926308957887442);
    (-0.046317533417266743, -1.11119947579511757);
    (0.00308374983022552396, 1.16081600497826409);
    (1.27508950625587847, -0.147276253141520513);
    (0.679182704640949542, 1.49285281784326629);
    (0.923949787693643332, 1.42241449991284319);
    (0.194857422459842944, -0.684254607093642275);
    (0.391320136015836817, -0.448826291907177599);
    (0.94521864126129973, 1.33472210175327444);
    (-0.0317099829599218896, -0.804423911931779312);
    (0.0603185423826655409, -0.533928733365225394);
    (0.399969920060608719, 0.469024504732757697);
    (0.113636464474634546, -0.00233720977701182275);
    (0.508724219082457241, -0.0213333810275188746);
    (0.394533758135437895, 0.923010811717366);
    (0.565981915728158924, -0.02919120456867344);
    (-0.0279871610038990859, 0.0445841363358924214);
    (0.698197090605748527, -1.22070791238031373);
    (-0.0389269018413421763, -0.780838897991659397);
    (0.164051579631987576, -0.124794728286781592);
    (0.363346531294175823, -0.288646996490602548);
    (0.828159986949559501, -0.250801850239709911);
    (0.730859193644997518, -1.03089647196014411);
    (0.155446928978762, 1.00582342620532317);
    (0.888962594558333263, 0.594951553443829106);
    (0.454339417632481757, 1.11678669469800407);
    (0.482643792072409816, 0.450628768018508818);
    (0.0545992800825143565, -0.986339059616768377);
    (0.942504692248497311, 1.38439167688769849);
    (0.591558375532874714, -0.704695783368601392);
    (0.849892940958360654, 0.853315343435313678);
    (0.148682446935871571, -1.07897642875081656);
    (0.705587733319345, 0.0145606336397267055);
    (0.777038672484796189, 1.38349402495956553);
    (0.538882543384064938, 0.263097648289149166);
    (0.370341366023230756, 2.53705659152089069);
    (0.370175769566094104, 1.26868398700331908);
    (0.807285922379946896, 1.62524767156112571);
    (0.341125609979042, 0.307587791509831066);
    (0.0328611721874462748, -1.38667041495164112);
    (0.440549593987298704, 0.118180409114684831);
    (0.750133722031769, 0.50050014492264594);
    (0.892309910618154234, -1.68288204693397025);
    (-0.0506901277377306281, -1.10200426538372498);
    (0.190575243336379763, -2.16167295130545822);
    (0.437303284723833, -0.98239855986423863);
    (0.668531458647550592, -0.329629438827123811);
    (0.282722731348090894, -0.96992592260808741);
    (0.677650172226174852, 0.211983440074905743);
    (0.26405138238090442, -0.638106404313114495);
    (-0.0135411180816353838, -1.93934410045007155);
    (0.885205270558985902, -0.532933333037403867);
    (1.27932017896223504, 0.604332918834191157);
    (0.880131910297824449, 1.19544616519153735);
    (0.737600051952451707, 1.66246487933367515);
    (0.708086980058328486, 0.17982709842132269);
    (0.547398572504997127, 0.0921349234590988775);
    (0.975386479253431693, ...); ...|]
Out[2]:
val negative_xys : (float * float) array =
  [|(-0.740868370664090836, 1.08918425571426525);
    (-1.37740650541737208, -0.485504714798280612);
    (-0.727393570168965886, 0.973982719532094143);
    (-0.784392023838062502, 2.71865137646865263);
    (-0.00955136985225313317, 3.33509446978725954);
    (0.342220316945413217, 0.660543080466411303);
    (-0.722361712483384766, -0.405427078724945);
    (0.146598097170695274, -0.498120376495847172);
    (-0.811723235783548325, -1.19667575390282099);
    (0.187228179687351837, 0.554514773594020793);
    (-1.08040064453677243, 1.92746757785456646);
    (-0.960946588169249383, -1.22096520776165463);
    (-0.667954961891730647, -0.720769814236824069);
    (-2.00113756159833311, -0.485262655724939429);
    (-1.21146979136696231, 0.360150961615395726);
    (-0.634255919303336468, 3.17229196538087832);
    (-1.33674506013146921, 0.4805521728609084);
    (-1.33312617691822299, 0.505288952406999781);
    (-1.6594723790983108, -0.941153163348022681);
    (-0.249223427511776396, 2.70715972489944479);
    (-0.775344470857552537, -0.00120787886107376341);
    (-0.189609198210250463, 2.94370329761667193);
    (0.721660020111321243, 1.77956379242342511);
    (-1.78521056457920713, -1.33750465224122239);
    (-0.557315943511985323, 0.872985817512321294);
    (-0.757975532460247292, 0.0405957616149069112);
    (-1.33817689711448762, 1.11568061665587637);
    (-0.836689722177289652, 1.8841192066470418);
    (-0.654631259856296888, 1.39647126046376613);
    (-0.86346557126659329, -0.0910318425724927);
    (-0.518656999966152332, 1.22442556308140538);
    (-0.941028758289328304, -0.41088379172099565);
    (-1.05641006057921416, -0.366757059263887131);
    (-2.23327576825649743, 0.414783837397973365);
    (-1.38859544879319197, 0.267815086269219127);
    (-0.492719979049636103, 0.063386471913106146);
    (-0.980687442806899456, 0.851438649329049446);
    (-0.29702015769045742, 0.166681222937227924);
    (-1.63907257929957328, -0.0565330719584493657);
    (-0.490925265494679741, 1.16331487742982631);
    (-1.63496451860481873, 3.13443599075426205);
    (-0.120754520895889783, 0.0501060407052121293);
    (0.240032331255176423, 0.285675157380761047);
    (-0.523988160121144908, 2.58682544385597391);
    (-1.54474193065101106, 1.68590501496547773);
    (-0.443196300373611962, 1.00548398802551464);
    (-0.489459632517519172, 0.496876969945031055);
    (-1.44111971396590155, -0.186343245495870891);
    (-1.32892594096160321, -2.68270569637777);
    (-1.38013369491686611, -1.04838290094287379);
    (-0.901627745243222, 0.317298001066819713);
    (0.432346133617774075, 1.09186205540876369);
    (-0.219591127784481532, 0.594845319000972816);
    (-0.271641696766069862, 0.850889591351813079);
    (-0.457400016413641641, 1.40058472787502497);
    (-0.77004261137454888, -0.353254860768216439);
    (-1.5056941333342726, -0.0895840604245670691);
    (-0.987592659369071701, 0.106433126412444234);
    (-0.344577128003331901, 0.687943573679068443);
    (-0.624555068841469, 2.06576777149002533);
    (-0.175808419258325355, -0.782828886022083);
    (-0.0727215102553447901, 1.94569478900874859);
    (-0.60876021705804273, -0.704151326371501);
    (-0.145674665220178245, 1.49141846866865269);
    (-0.579910311002452827, 0.900085691196372939);
    (-0.542655921436758781, 0.878879077226674799);
    (-0.161738320643657674, 1.60062903146681057);
    (-1.7372615730932881, -0.421201694391357351);
    (-0.504693374045440124, 0.79011882648778986);
    (-0.861881373610274726, 1.4984566510075723);
    (-0.758003349149310113, -0.0792762144926012491);
    (-0.0787496034405590351, 0.0374380094453739);
    (-0.493744837553645954, -0.928657750422277162);
    (0.345643639045405848, 2.17284656341119531);
    (0.174126999491181933, 0.594639782445616216);
    (-0.958896406337629514, -0.790865809995023317);
    (-0.726052113743039906, 1.88648162039296752);
    (-1.03320527805371865, -1.65557870595439205);
    (-0.369871721812796816, 1.99016949622315575);
    (0.45965813435060765, 1.2130660874092829);
    (-0.145253007538399981, 1.24907488180302173);
    (-1.83042335467916573, 2.32743856545617733);
    (-1.53606397191815747, 1.59413555497581871);
    (-0.55263679872512772, 0.294991656826287418);
    (0.28839130760372389, 0.459311956062244287);
    (0.0410018726895357, -0.717890382898274226);
    (-1.41658469346820803, -0.867370187707129481);
    (-0.777570165755067766, 0.321388825031423297);
    (-0.809014867560326478, -0.568961242300300762);
    (-1.20908381014768351, 1.58111022498808085);
    (-0.305624749688111153, 0.45413482335216504);
    (-0.156223435123368226, 0.201350668222346069);
    (-1.62262825850505443, -0.982998090428432447);
    (0.0972583159823341425, 1.53404135504567307);
    (-0.875086553604119466, 2.70407606701966197);
    (-1.05647820742728804, 1.25480353287890711);
    (-0.613153231518847264, 0.147014290729266306);
    (-0.879576461450714109, 0.737111160657093478);
    (-0.969196686163470544, 0.0838907959962045258);
    (-0.23389844644062141, ...); ...|]

In [3]:
let vp = A.init ["iocaml"] in
A.Axes.box vp ;

A.set_color vp A.Color.red ;
A.Array.xy_pairs vp positive_xys ;

A.set_color vp A.Color.blue ;
A.Array.xy_pairs vp negative_xys ;

A.close vp


Out[3]:
- : unit = ()

In [4]:
let oc = open_out "datasets/bivariate_gaussian_2d.csv" in
let ppf = Format.formatter_of_out_channel oc in
Array.iter
  (fun (x, y) -> Format.fprintf ppf "%g,%g,0@." x y)
  negative_xys ;
Array.iter
  (fun (x, y) -> Format.fprintf ppf "%g,%g,1@." x y)
  positive_xys ;
close_out


Out[4]:
- : out_channel -> unit = <fun>

In [ ]: