Just draw something on the screen!

Nostalgia for times past sometimes leads to frustration and anger. Yes, times are harder for would-be pixel artists, but nothing a little work can't solve. Let's do fractals in OCaml!

Suppose you've already installed the wonderful findlib (vielen Dank, Herr Stolpmann!). Suppose, furthermore, you've succeded in compiling OcamlSDL. This might look like a heavy prerequisite to fulfill before showing pretty graphics, but it's a job you have to do only once. Then, something like plotting Newton fractals requires less than ninety lines of code, and the result is this:

I find natural to abstract iteration patterns into higher-order functions. In this case, centering and scaling the pixel plane over the complex plane (the so-called screen-space transformation) is tedious but repetitive code that could be reused. Furthermore, the details of striding, color conversion and such are the same from program to program, where the only thing that varies is the actual calculation performed on each point:

let iter_surface ?(mag=1.0) f g surface =
  let info   = Sdlvideo.surface_info surface
  and pixels = Sdlvideo.pixel_data_32 surface in
  let size   = min info.Sdlvideo.w info.Sdlvideo.h in
  let scale  = mag /. float size
  and cx, cy = info.Sdlvideo.w / 2, info.Sdlvideo.h / 2 in
  for i = 0 to info.Sdlvideo.h - 1 do
    let b = i * info.Sdlvideo.pitch / 4
    and y = float (cy - i) *. scale in
    for j = 0 to info.Sdlvideo.w - 1 do
      let x = float (j - cx) *. scale in
      let c = f x y in
      pixels.{b + j} <- Sdlvideo.map_RGB surface c
    g i

The magnification mag specifies how many real units the shortest dimension spans. The outer loop iterates with i for every scanline, whose base address into the pixels array is b. The inner loop iterates with j the index to successive pixels, and from both i and j the coordinates x, y are computed. With those the function f is called, and the result c (an RGB triple of type Sdlvideo.color) is mapped and assigned to the pixel. At the end of every scanline, the function g is called so that the main program can display progress.

The simplest SDL event loop waits for the user to quit the application:

let rec event_loop () =
  match Sdlevent.wait_event () with
  | Sdlevent.QUIT -> ()
  | _ -> event_loop ()

Anything more complicated is easy to accomodate. Now, there are a myriad schemes for color rendering of mathematical data, but using the color wheel is very common, especially since the complex argument maps naturally to a hue value. Converting HSV colors to RGB is somewhat involved, but the following code is entirely reusable:

let quomod d x =
  let f, e = modf (float d *. (x -. floor x)) in truncate e, f

let byte_of_frac x =
  let mask x = lnot (x asr 30) land 0xff in
  let n = truncate (ldexp x 8) in
  (n land (mask n))  lor  mask (n - 256)

let hsv_to_rgb (h, s, v) =
  let epsilon = 1e-3 in
  let w = byte_of_frac v in 
  if abs_float s < epsilon then (w, w, w) else
  let (e, f) = quomod 6 h in
  let x = byte_of_frac (v *. (1. -. s))
  and y = byte_of_frac (v *. (1. -. s *. f))
  and z = byte_of_frac (v *. (1. -. s *. (1. -. f))) in
  match e with
  | 0 -> (w, z, x)
  | 1 -> (y, w, x)
  | 2 -> (x, w, z)
  | 3 -> (x, y, w)
  | 4 -> (z, x, w)
  | 5 -> (w, x, y)
  | _ -> assert false

quomod scales the fractional part of x by the integer d, and splits the result into an entier part e and a fractional part f. This clasifies the hue into a sextant on which the fractional part linearly interpolates. To map a real number in the interval [0… 1] into [0… 256), byte_of_frac scales the fraction by 28 and clips the result to a byte by constructing the appropriate bit-mask.

The code so far doesn't incorporate any specific functionality, and could be shipped off to a standard module to use again and again. The particular application that I had in mind, Newton fractals, are the plot on the complex plane of the basins of attraction to the complex roots of a polynomial, as found by applying the Newton method.

Polynomials and their derivatives are especially easy to evaluate at a point in one pass using Horner's method, when represented as the list of their coefficients with the independent term at the head, a so-called dense representation of polynomials:

open Complex

let eval p x =
  List.fold_left (fun (y, dy) p ->
    (add p (mul y x), add y (mul dy x)))
    (zero, zero) p

With that, the Newton iteration is very simple:

let nr_root ?(max_iter=32) ?(epsilon=1e-6) p x =
  let rec go (x, i as res) =
    if i = max_iter then res else
    let y, dy = eval p x in
    if norm y < epsilon then res else
    let dx = div y dy in
    go (sub x dx, succ i)
  in go (x, 0)

The root finder returns the number of iterations required, up to a maximum and/or specified precision, together with the closest estimate to the root. This allows me to "color" the fractal according to the time needed to converge to a root. With the chosen representation, the polynomial zn - 1 (which represents, by definition, the n-th roots of unity) is straightforward to construct:

let pow n x =
  let rec go i l =
    if i = 0 then l else go (pred i) (zero :: l)
  in go n [x]

let nth_roots n = (neg one) :: pow (n - 1) one

These twenty lines are all the mathematical machinery required. It only remains to put all the pieces together:

let () =
  let max_iter = 128 in
  Sdl.init [`EVERYTHING];
  let screen = Sdlvideo.set_video_mode ~w:640 ~h:480 ~bpp:32 [`SWSURFACE] in
  Sdlwm.set_caption ~title:"nrfractal" ~icon:"";
  let p = nth_roots 3 in
  iter_surface ~mag:2.0 (fun x y ->
    let (z, n) = nr_root ~max_iter p { re = x; im = y } in
    let hue = Complex.arg z *. 0.159154943091895346 +. 0.5
    and vlu = float (max_iter - n) /. float max_iter in
    hsv_to_rgb (hue, 1., vlu) )
    (fun i -> if i land 7 = 0 then Sdlvideo.flip screen)
  Sdlvideo.flip screen;
  event_loop ();
  Sdl.quit ()

The iteration limit max_iter controls the fractal detail. I first initialize the SDL library and create a big enough window, with a descriptive title. Then, I build a polynomial p representing the third roots of unity, and invoke iter_surface with a function that finds p's root starting from the complex value corresponding to the coordinates of the point to be plotted. I convert the complex angle of the returned approximation z to a hue value (by dividing by 2π and normalizing to the range [0… 1]); analogously, the number of iterations determines the vividness, or value of the color. This is converted to an RGB triple and returned. Every eight scanlines the screen is refreshed with the contents of the backing buffer. Once every point is plotted, the event loop is entered until the user decides to quit.

So, not counting the generic, reusable code, less than 40 lines of code suffice to just draw something on the screen. Have fun!


Aperiodic Tilings

When you thought that your obsession had run its course and left you in peace, finally, along comes something new and shiny to once more lead you astray. I've written half a dozen posts about sorting networks and genetic algorithms; I thought I had put that topic to rest. What am I to do when I find the Tilings Encyclopedia later the same week? How could I resist this aperiodic fest of self-similar tilings?

The nice thing about the Encyclopedia is not that the tilings are extensively cross-referenced and annotated with the relevant bibliographies; that's all very useful, I don't doubt, for its intended audience; but what I immediately liked was that the tilings are presented as rewriting rules. That should make it easy to replicate them, I thought; I wasn't very wrong, but I find that a functional program is not finished until its structure is strong and regular enough to let it stand on its own legs. After refactoring and restructuring and rewriting the code for a week or so, I had a robust, general program that let me add new tilings, write the tilings in different formats and so on; my last post deals with the details of the back-end mechanism.

Look at the page for the Amman-Beenker rhomb triangle tiling, for instance (go ahead, I'll wait for you). The tiling uses three primitives: two oriented triangles and a parallelogram. Each is "split" or rewritten into a configuration of the same tiles. The process by which the plane is tiled by rewriting is called inflation; the rate of expansion of a primitive tile into a configuration of tiles at each step is called the inflation coefficient.

In typical OCaml fashion, the tyranny of early bound top-level definitions means that I have to sweat the small details first, so that the intended objective is reached in a grand finale by a simple function that synthesizes all the bottom-up development. That's is at odds with writing an engaging literate program, I'm afraid, but it has to be done. The first ingredient, then, in my recipe for pretty pictures is something that lets me put the pieces where they should go. A little bit of linear algebra, in short.

I'll use the most direct representation possible of affine transformations, an affine matrix T such that a vector v gets mapped to vT. Only, I'll be sloppy and identify points on the plane (x, y) ∈ R2 with homogeneous vectors (x, y, 1) ∈ A2. I always use row vectors and multiply them on the left, which means that the last column of the matrix T is fixed to (0, 0, 1)T, so I don't need to represent it. To simplify things, I flatten the matrix in column-major order:

type xform = float array

With that, the symmetries of the dihedral group D2 are simple constant matrices:

let identity = [|  1.;  0.;  0.;  0.;  1.;  0. |]
and fliph    = [|  1.;  0.;  0.;  0.; -1.;  0. |]
and flipv    = [| -1.;  0.;  0.;  0.;  1.;  0. |]
and reflect  = [| -1.;  0.;  0.;  0.; -1.;  0. |]
and qrot     = [|  0.; -1.;  0.;  1.;  0.;  0. |]

The affine transforms are equally direct:

let scale s = [| s; 0.; 0.; 0.; s; 0. |]
and translate (x, y : point) = [| 1.; 0.; x; 0.; 1.; y |]
and skew (x, y : point) = [| 1.; x; 0.; y; 1.; 0. |]
and rotate a =
  let s, c = sin a, cos a in
  [| c; -. s; 0.; s; c; 0. |]

Note that I use homogeneous scaling. The composition of two transforms is simply the product of the corresponding matrices. I denote the composition with the % operator, but perhaps a better choice would have been an infix symbol that highlighted the non-commutative nature of transformations:

let ( % ) (t : xform) (u : xform) : xform =
    t.(0) *. u.(0) +. t.(3) *. u.(1);
    t.(1) *. u.(0) +. t.(4) *. u.(1);
    t.(2) *. u.(0) +. t.(5) *. u.(1) +. u.(2);
    t.(0) *. u.(3) +. t.(3) *. u.(4);
    t.(1) *. u.(3) +. t.(4) *. u.(4);
    t.(2) *. u.(3) +. t.(5) *. u.(4) +. u.(5);

To apply a transformation to a point is as simple as vector-matrix multiplication:

let apply (t : xform) (x, y : point) : point =
  (x *. t.(0) +. y *. t.(1) +. t.(2),
   x *. t.(3) +. y *. t.(4) +. t.(5))

The underlying symmetries of the tilings I'll reproduce are polygonal (pentagonal, octagonal, decagonal and so on), so I find easier to work in degrees:

let pi = 3.141592653589793238462643383276
let deg x = x *. 0.017453292519943295769236907684
let rad x = x *. 57.295779513082320876798154814169

The gist of a tiling is captured in the following signature:

module type TILING = sig
  type t
  val name  : string
  val shape : t -> point list
  val color : t -> rgbcolor
  val seed  : (t * xform) list
  val rules : (t * xform) -> (t * xform) list

The type t abstracts away the tiles used. name is the full name of the substitution, for purposes of meaningfully labeling the output. shape t returns the polygon corresponding to the tile t as a list of points, and color t returns an RGB triple for that tile. seed is a list of placed tiles with which the inflation process will start. Finally, the meat of the tiling is encoded in rules, that for each tile t returns the expansion as a list of placed tiles. By convention, I'll scale the resulting expansion so that the transformations are local to each tile; in other words, I'll avoid the need to keep track of the overall scale. With that, the core algorithm is captured in the following function:

let iterate f l n =
  let rec go l n =
    if n == 0 then l else
    go (List.concat (List.map f l)) (pred n)
  in go l n

This rather generic function repeatedly expands a tiling l by applying n times the rules f. Interpreted under the list monad, iterate is nothing more than functional iteration of the monadic function f; but clearly bringing the entire machinery of the non-deterministic monad for just this is overkill.

With this, I have all the necessary pieces to actually implement the drawing algorithm. In keeping with the theme of the previous post, I'll output EPS code to a temporary file which I'll then open with Preview. The Generator is parameterized by a TILING:

module Generator (S : TILING) = struct

But, there's a snag to solve: as I've set up things, the final size of the tiling will be that of the seed, and I use, in general, shapes with sides of length 1. This will make the final tiling the size of a pixel! Precisely this is the drive behind using "abstract" back-ends: I can draw the tiling once to get its "real" size, rescale it, and then draw it for real:

  module Tiling (G : GRAP) = struct
    module M = Monad(G.M)
    open M.Op

    let draw = M.mapm_ (fun (t, x) ->
      let vs = List.map (apply x) (S.shape t) in
         G.color (S.color t)
      >> G.poly ~fill:true vs
      >> G.gray 0.
      >> G.poly ~fill:false vs)

Remember that a GRAP back-end contains the monad M that implements the back-end. Drawing a tiling involves taking each placed tile (t, x), transforming the polygon S.shape t by the placement transform x, and drawing it. Again, this is completely generic on the "drawing" method. I take advantage of this genericity first to find the bounding box of the tiling:

  let bounds tiling =
    let module T = Tiling(Bounds) in
    Bounds.bounds (T.draw tiling)

Then, I draw the tiling simply by setting an appropriate line width and using the correct back-end:

  let show width height tiling =
    let module T = Tiling(Eps) in
    Eps.show width height (T.M.seq (Eps.weight 0.5) (T.draw tiling))

This pair of functions is the pay-off of so much refactoring and abstraction! Finally, generate ties all together by finding the bounding box, scaling the tiling and outputting it:

  let transform x' = List.map (fun (t, x) -> (t, x % x'))

  let generate dim levels =
    let tiling = iterate S.rules S.seed levels in
    let bbox   = bounds tiling in
    let width  = bbox.Bounds.right -. bbox.Bounds.left
    and height = bbox.Bounds.bottom -. bbox.Bounds.top in
    let size   = dim /. width in
    let xform  = translate (-. bbox.Bounds.left, -. bbox.Bounds.top) % scale size in
    show (width *. size) (height *. size) (transform xform tiling)

This function has a rather arbitrary design decision: whether to scale to the bounding box of the seed, or to that of the final tiling. If the expansion were confined to the boundary of the parent tile, it would make no difference (except for the work involved!); not all tilings, however, are so tidy. One of the most famous aperiodic tilings, Penrose's, overstep the boundaries of the parent tile.

The job is done, except for the actual tilings. First, the Amman-Beenker rhomb-triangle tiling:

module AmmanBeenkerRhombTriangle : TILING = struct
  type t = Tu | Td | Pa

  let name = "Amman-Beenker"

  let rho  = sqrt 2.
  and rho' = sqrt 0.5

  let shape = function
  | Tu -> [0., 0.; rho', rho'; rho, 0.; 0., 0.]
  | Td -> [0., 0.; rho, 0.; rho', -. rho'; 0., 0.]
  | Pa -> [0., 0.; 1., 0.; 1. +. rho', -. rho'; rho', -. rho'; 0., 0.]

  let color = function
  | Tu -> (0.976, 0.4, 0.063)
  | Td -> (0.973, 0.796, 0.4)
  | Pa -> (0.804, 0.796, 0.855)

  let seed = 
    let xform = rotate (deg 45.) in
    [Tu, xform; Td, xform]

  let rules (t, x) =
    let x' = scale (rho -. 1.) % x in match t with
  | Tu -> [
    Tu, rotate (deg (-135.)) % translate (1., 1.) % x';
    Pa, rotate (deg (-90.)) % translate (1. +. rho', 1. +. rho') % x';
    Tu, rotate (deg 135.) % translate (2. +. rho', rho') % x';
    Pa, rotate (deg 0.) % translate (1. +. rho', rho') % x';
    Td, rotate (deg 180.) % translate (1. +. rho, 0.) % x'; 
  | Td -> [
    Td, rotate (deg 135.) % translate (1., -1.) % x';
    Pa, rotate (deg 135.) % translate (1. +. rho', -1. -. rho') % x';
    Td, rotate (deg (-135.)) % translate (2. +. rho', -. rho') % x';
    Pa, rotate (deg 45.) % translate (1. +. rho', -. rho') % x';
    Tu, rotate (deg 180.) % translate (1. +. rho, 0.) % x'; 
  | Pa -> [
    Pa, x';
    Td, rotate (deg 0.) % translate (1., 0.) % x';
    Tu, rotate (deg 135.) % translate (2. +. rho, -1.) % x';
    Pa, rotate (deg 0.) % translate (1. +. rho, -1.) % x';
    Td, rotate (deg 180.) % translate (1. +. rho +. rho', -1. -. rho') % x';
    Tu, rotate (deg (-45.)) % translate (rho', -. rho') % x';
    Pa, rotate (deg (-90.)) % translate (1. +. rho, 0.) % x';

There's not much to say about this code, except that it encodes precisely the data presented in its Encyclopedia page. Let me just add that I found encoding the transformations quite error-prone; but then again I'm not a visual thinker. The result is:

let () =
  let module G = Generator(AmmanBeenkerRhombTriangle) in
  G.generate 256. 4

This tile could make excellent backgrounds; note that the weaving of triangles and parallelograms creates a somewhat disturbing optical illusion. Another beautiful tiling is the Robinson triangle tiling:

module RobinsonTriangle : TILING = struct
  type t = Su | Sd | Au | Ad

  let name = "Robinson.Triangle"

  let phi  = 0.25 *. (sqrt 5. +. 1.)
  let phi' = 0.25 *. (sqrt 5. -. 1.)
  and eta  = sqrt (0.125 *. (5. +. sqrt 5.))
  and eta' = sqrt (0.125 *. (5. -. sqrt 5.))

  let shape = function
  | Su -> [0., 0.; 2. *. phi , 0.; phi ,    eta'; 0., 0.]
  | Sd -> [0., 0.; 2. *. phi , 0.; phi , -. eta'; 0., 0.]
  | Au -> [0., 0.; 2. *. phi', 0.; phi',    eta ; 0., 0.]
  | Ad -> [0., 0.; 2. *. phi', 0.; phi', -. eta ; 0., 0.]

  let color = function
  | Su -> (0., 0., 0.376)
  | Sd -> (0.808, 0.796, 0.89)
  | Au -> (1., 0.8, 0.4)
  | Ad -> (1., 0.4, 0.)

  let seed =
    let t = rotate (deg 36.) in
    [ Su, t; Sd, t ]

  let rules (t, x) =
    let x' = scale (0.5 /. phi) % x in match t with
  | Su -> [
    Su, rotate (deg (-144.)) % translate (0.5 +. phi, eta) % x';
    Ad, rotate (deg ( -36.)) % translate (0.5 +. phi, eta) % x';
    Sd, reflect % translate (1. +. 2. *. phi, 0.) % x';
  | Sd -> [
    Sd, rotate (deg 144.) % translate (0.5 +. phi, -. eta) % x';
    Au, rotate (deg  36.) % translate (0.5 +. phi, -. eta) % x';
    Su, reflect % translate (1. +. 2. *. phi, 0.) % x';
  | Au -> [
    Au, rotate (deg 108.) % translate (1., 0.) % x';
    Su, rotate (deg (-108.)) % translate (0.5, 2. *. eta *. phi) % x';
  | Ad -> [
    Ad, rotate (deg (-108.)) % translate (1., 0.) % x';
    Sd, rotate (deg 108.) % translate (0.5, -2. *. eta *. phi) % x';

The seed brings together to obtuse triangles to form a parallelogram; there is, however, no way to "cut" the tile into a rectangular tile with matching borders, so it won't work as a background:

let () =
  let module G = Generator(RobinsonTriangle) in
  G.generate 256. 6

Finally, the classic Penrose rhomb tiling:

module PenroseRhomb : TILING = struct
  type t = R0 | R1

  let name = "Penrose.Rhomb"

  let phi  = 0.25 *. (sqrt 5. +. 1.)
  let phi' = 0.25 *. (sqrt 5. -. 1.)
  and eta  = sqrt (0.125 *. (5. +. sqrt 5.))
  and eta' = sqrt (0.125 *. (5. -. sqrt 5.))

  let shape = function
  | R0 -> [0., 0.; eta', -. phi ; 0., -2. *. phi ; -. eta', -. phi ; 0., 0.]
  | R1 -> [0., 0.; eta , -. phi'; 0., -2. *. phi'; -. eta , -. phi'; 0., 0.]

  let color = function
  | R0 -> (0.973, 0.796, 0.4)
  | R1 -> (0.08, 0.027, 0.4)

  let seed = [
    R0, identity;
    R0, rotate (deg 72.);
    R0, rotate (deg 144.);
    R0, rotate (deg 216.);
    R0, rotate (deg 288.);

  let rules (t, x) =
    let x' = scale (2. *. phi') % x in match t with
  | R0 -> [
    R0, reflect % translate (0., -2. *. phi) % x';
    R1, rotate (deg 36.) % translate (eta', -. phi) % x';
    R0, rotate (deg 144.) % translate (0., -1. -. 2. *. phi) % x';
    R0, rotate (deg (-144.)) % translate (0., -1. -. 2. *. phi) % x';
    R1, rotate (deg (-36.)) % translate (-. eta', -. phi) % x';
  | R1 -> [
    R1, rotate (deg 108.) % translate (-. eta', phi' -. 0.5) % x';
    R1, rotate (deg (-108.)) % translate (eta', phi' -. 0.5) % x';
    R0, rotate (deg 108.) % translate (0., -1.) % x';
    R0, rotate (deg (-108.)) % translate (0., -1.) % x';

As you can see, the tiles overflow their parents. I used a "sun" seed to generate a pentagonally-symmetrical result:

let () =
  let module G = Generator(PenroseRhomb) in
  G.generate 256. 4

I've uploaded the complete program, with the code in the last post and in this, to the OCamlCore Forge. There are a number of really nice tilings I'll maybe some day implement noted in comments at the end of the file. Or, find the ones you like and add a snippet to the forge. Have fun!


Monads are Plug-ins

As Haskell gains popularity, a dozen articles or more a month about them make the front page at the Programming reddit. Most deal with the necessity of using and understanding monads to control side-effects in Haskell. This is only natural and to be expected since, in Haskell, they're a language design decision, and as such as inescapable as laziness and as structural as the predefined operator precedence.

My background is in OCaml (gratuitous biographical aside: I found a notebook with OCaml code I wrote in '96), and so I view the IO and attendant monads with some disdain. Let me explain myself: my view is that using them to control side-effects is a rather pedestrian application of monads.

There's another, much higher-level view of them, and that is monads as categorical constructions. Category theory, by its very nature as a mathematical framework, is a language in which to express relations and operations between things. It's the mathematical equivalent of the GoF's patterns. So, reading an article like this one left me with the slightly uneasy feeling of having simultaneously not understood much of it (because I don't know Haskell, and because the article has no context to speak of) and knowing that an isomorphism between the IO monad and anything else cannot allow you to "weave in and out of using IO": freeness demands that the "other" side of the isomorphism contain, encode or fake enough of IO so as to be undistinguishable from it (the type of IO' as presented in the article lent me evidence of this). This is where category theory comes handy: understanding by pattern-matching.

The practical upshot of this is that it enables a third view of monads, Moggi's, as capsules of meaning that are, if not composable, certainly pluggable. First in Notions of computation and monads, then in Monads and Effects (whence IO and friends), Moggi shows that monads are the right tool for the (hard) job of making your code independent of, or at least parameterizable by, semantics. Monads are plug-ins.

In what way are monads plug-ins? Well, a plug-in must have two things: specific behavior that you can invoke to augment your program in unforeseen ways even after you've written it, and a standard interface by which the program can call it. Monads have a rather minimalist interface:

module type MONAD = sig
  type 'a m
  val return : 'a -> 'a m
  val bind   : 'a m -> ('a -> 'b m) -> 'b m

First comes a type, α m: this is the "protocol" by which your program and the plug-in communicate without one having to "know too much" (or more than the standardized interface allows) about the other. It lets the monad say, in effect: "for me to be arbitrary but compatible, you'll have to accept that I do things my way". The monad gets to have its "private space" in which to do its job, without fear that the program will mess up its stuff.

Then comes the method return, that the program uses to pass values down to the monad. It lets the code say, "here, have this value for your own use". Finally, there's the operation bind that lets the program and the monad maintain a conversation. With it, the program can say to the monad "oh, when you are done with anything, here's another thing I need you to do", to which the monad responds "OK, I'll take that and give you back the result". This "wiring" aspect to the bind is emphasized in it being usually called >>=, an infix operator.

This is all nice, operational and anthropomorphizing, but hardly illuminating. The fact that the monad functions as a protocol is formalized in the monad laws which every concrete, "plug-in" monad must abide to:

  1. bind (return a) k  ≡  k a
  2. bind m return  ≡  m
  3. bind (bind m k) h  ≡  bind m (fun x -> bind (k x) h)

The first law states that return on the left of bind does nothing. The second law states that return on the right of bind does nothing. The third law means that a bind of binds associates freely. But that's exactly what a monoid is: a "thing" with an associative operation that has a neutral, "do-nothing" element on the left and right of it.

Consider by way of example the concatenation of strings, denoted by ^. There is a special string, "" or the empty string, that "doesn't change" a string when catenated to it: for all strings s, "" ^ s is the same as s, and so is s ^ "". Also, it doesn't matter which string you concatenate before and which after; it's the order in which you concatenate that matters. In symbols, s ^ (t ^ u) is the same as (s ^ t) ^ u.

You might think, what have monoids to do with monads? The answer is that monads "behave" algebraically as monoids. You might think next, aren't monads monoids, then? Well, no. The crucial difference between a monoid and a monad is that, packed inside the return and the bind operations of a monad there is meaning tucked in. It could send output to a console. It could update a memory location. It could try simultaneously different possible computations and return any, or all of them. It could record transactional data. The key is that the monad laws are a contract that allow you, no matter how complicated the "stuff" the monad does underneath, operate with it in a structured, predictable way. Arbitrary behavior under a fixed interface. A plug-in, in short.

If you followed me so far you might have a nagging doubt by now. How do you "escape" the monad, once you use it? That is, return serves to put plain old values in a "wrapper" the monad can use but, how can you get back what the monad did with it? Where's the function with type α m → α? The bad news is that there isn't any… in general. That is, there is no guaranteed way to get back a value from an arbitrary monad. However, many useful monads do provide an escape to "retrieve" the useful work the monad did.

At this point I should show a concrete monad. A classical example (one that I'll eventually need to use) is the state monad. It allows to "plug-in" statefulness to an otherwise "pure" or "stateless" function:

module State (S : sig type t end) = struct
  type 'a m = St of (S.t -> 'a * S.t)
  let return a = St (fun s -> (a, s))
  let bind (St m) f = St (fun s ->
    let (x, s') = m s in
    let (St m') = f x in
    m' s')

  let get = St (fun s -> (s, s))
  let put = fun s -> St (fun _ -> ((), s))

  let eval (St m) = fun s -> fst (m s)
  let run  (St m) = fun s -> snd (m s)

The innards are perhaps not very illuminating on a first reading, so let me show its type:

module State : functor (S : sig type t end) -> sig
  type 'a m = St of (S.t -> 'a * S.t)
  val return : 'a -> 'a m
  val bind   : 'a m -> ('a -> 'b m) -> 'b m
  val get    : S.t m
  val put    : S.t -> unit m
  val eval   : 'a m -> S.t -> 'a
  val run    : 'a m -> S.t -> S.t

This monad is multi-parametric, in the sense that not only the values your function computes are arbitrary (the α in the monad's type), but the states you might want to track are too. Suppose you want to tally the arithmetic operations a function uses to calculate a value. The state can be as simple as a single integer, or as complex as a tuple recording separate counts for additions and subtractions, multiplications, divisions, square roots and so on. Whichever the case, the key to this monad is that an impure function can be split into a pure part and a state transformer part that takes an input state and returns an output state together with the value of the function. With that, it's not difficult to see that if return is to be a neutral element, it must pass its input state unchanged. In other words, the effect of a value is nil. In yet other words, a value is always pure. Also, bind must keep track of intermediate states between effects, and thread them appropriately.

Immutable state is no state at all but just another value, so this monad comes with a pair of operations get and put that allow your program to retrieve the state at some point of your function, and modify it. Also, after your computation terminates, you should be able to retrieve not only its final value with eval but also to access the final state with run. This variant assumes you'll want one or the other; it is possible to get both at once without having to re-compute the function.

Note that get is a monadic value, not a function. This might not make sense unless you look into the monad and see how it is constructed. For a less operational view, think of get as representing, by itself, whatever the state is at that point in the program. Since it's a monadic value, the only useful thing to do with it is to bind it to a function:

module St = State(struct type t = int end)

let incr = St.bind St.get (fun s -> St.put (succ s))

With that, it's simple to write a monadic addition operator that counts how many times it is invoked:

let ( +! ) mx my =
  St.bind mx   (fun x ->
  St.bind my   (fun y ->
  St.bind incr (fun _ -> St.return (x + y))))

And using it is equally straightforward:

let test () =
  let msum l = List.fold_right (fun x m -> St.return x +! m) l (St.return 0)
  in St.run (msum [1;2;3;4]) 0

This is all quite tedious to repeat each time I need a monad. Fortunately, most of the operations you can build out of monadic bind and monadic return are structural, and so generic on the particular monad. Enter the Monad functor:

module Monad (M : MONAD) = struct
  include M

  let seq m f = bind m (fun _ -> f)

  let join m = bind m (fun m -> m)

  let fmap f m = bind m (fun x -> return (f x))

  let liftm = fmap

  let liftm2 f m m' =
    bind m  (fun x ->
    bind m' (fun y ->
    return (f x y)))

  let liftm3 f m m' m'' =
    bind m   (fun x ->
    bind m'  (fun y ->
    bind m'' (fun z ->
    return (f x y z))))

  let mapm f l =
    List.fold_right (liftm2 (fun x xs -> f x :: xs)) l (return [])

  let sequence l = mapm (fun x -> x) l

  let mapm_ f l =
    List.fold_right (fun x -> seq (f x)) l (return ())

  let sequence_ l = mapm_ (fun x -> x) l

  module Op = struct
    let ( >>= ) = bind
    let ( >>  ) = seq

Again, looking at the types is more illuminating than squinting at the implementation:

module Monad : functor (M : MONAD) -> sig
  type 'a m = 'a M.m
  val return    : 'a -> 'a m
  val bind      : 'a m -> ('a -> 'b m) -> 'b m
  val seq       : 'a m -> 'b m -> 'b m
  val join      : 'a m m -> 'a m
  val fmap      : ('a -> 'b) -> 'a m -> 'b m
  val liftm     : ('a -> 'b) -> 'a m -> 'b m
  val liftm2    : ('a -> 'b -> 'c) -> 'a m -> 'b m -> 'c m
  val liftm3    : ('a -> 'b -> 'c -> 'd) -> 'a m -> 'b m -> 'c m -> 'd m
  val mapm      : ('a -> 'b) -> 'a m list -> 'b list m
  val sequence  : 'a m list -> 'a list m
  val mapm_     : ('a -> 'b m) -> 'a list -> unit m
  val sequence_ : 'a m list -> unit m
  module Op : sig
    val ( >>= ) : 'a m -> ('a -> 'b m) -> 'b m
    val ( >> )  : 'a m -> 'b m -> 'b m

seq allows you to invoke the monad on the left not for its value but for its effect. join lets you "squash down" a monadic monad to the underlying monad; the fact that it is generic and independent of the underlying semantics means that the monad itself, viewed as a categorical functor, is idempotent, that is, it's useless to monadize a monad. fmap "maps" a function inside a monad, which effectively makes a functor out of a monad. liftmn is an indexed family of mappers for functions of one, two… arguments, I've stopped at three but you could go on as you need.

A list of monads can be composed together to turn them "inside out" into a monad of lists. That is what mapm and sequence do, commuting the monad and the list, with or without transforming the list's elements first. Also, a list of monads can be evaluated in sequence purely by their side effects, that is where the mapm_ and sequence_ come in handy (the names of these functions are exactly the same as that of their Haskell counterparts).

Lastly, the two very common operations return and seq have very convenient operators associated with them. With all this, I can write the following:

module StM = Monad(St)
open StM.Op

let count2 op x y = incr >> StM.liftm2 op x y

let ( +! ) = count2 ( + )
and ( -! ) = count2 ( - )
(* and so on *)

This is the big payoff monads offer: functions can be generic on the structure of the monad, and so independent of its semantics. Monads are plug-ins.

Let me try with another very different monad, a very simple Out:

module Out = struct
  type 'a m = Out of (out_channel -> 'a)
  let return a = Out (fun _ -> a)
  let bind (Out m) f = Out (fun out ->
      let x = m out in
      let (Out m') = f x in
      m' out)

  let fmt fmt =
    let ship s = Out (fun out ->
      output_string out s; output_char out '\n')
    in Printf.kprintf ship fmt

  let write name (Out m) =
    let out = open_out_bin name in
    try let x = m out in close_out out; x
    with e -> close_out out; raise e

This monad interleaves formatted writing operations, fmt, as a side effect of whatever the code using it does. What is special of this monad is that the "escape" function, write ships out all the accumulated shows to a named output file, safely closing it even in the event of an exception.

With this, I'm ready to show you how to actually consume plug-ins from your program. Suppose you want to have a simple graphics DSL. I have a program with exactly such a need, and as a MacOS X user, the easiest way I found to satisfy it was to generate EPS and to use /usr/bin/open to handle the data for me. But I didn't want to encapsulate (!) my drawing code inside a PostScript interface, but needed it to be as "abstract" as possible (my original back-end was a Mathematica one). I settled on a simple hybrid:

type point = float * float

type rgbcolor = float * float * float

module type GRAP = sig
  module M : MONAD
  type t = unit M.m
  val weight : float -> t
  val gray   : float -> t
  val color  : rgbcolor -> t
  val save   : t -> t
  val dot    : point -> t
  val line   : point -> point -> t
  val rect   : ?fill:bool -> point -> point -> t
  val poly   : ?fill: bool -> point list -> t

The module is not parameterized by a monad, but it does include a monad that will provide the "glue" to whatever back-end I want to write. As you can see, it is rather primitive but includes operations for drawing in color. It is worth mentioning that this is not a so-called retained interface, but an immediate one, and so save runs the drawing operations on a "sub-context" that is restored when all the commands are shipped out. Also, the type that carries the drawing operations is an alias for the unit monadic value, as drawing doesn't really have a value. My first implementation was the EPS back-end:

let mm = ( *. ) (72. /. 25.4)

module Eps = struct
  module M = Monad(Out)
  type t = unit M.m
  open Out
  open M.Op

In this case, all drawing is to be output to a file, so the monad that implements the plug-in driver is Out. The code uses fmt extensively to format the PostScript commands. For instance, an EPS file must specify a bounding box against which all drawing will be clipped to:

  let iso_time () =
    let tm = Unix.gmtime (Unix.time ()) in
    Printf.sprintf "%04d%02d%02dT%02d%02d%02d"
      (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
      tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec

  let eps margin width height drawing =
    let app_name = Filename.basename Sys.executable_name in
       fmt "%%!PS-Adobe-3.0 EPSF-3.0"
    >> fmt "%%%%BoundingBox: 0 0 %d %d"
      (truncate (ceil (width  +. 2. *. margin)))
      (truncate (ceil (height +. 2. *. margin)))
    >> fmt "%%%%Creator: %s" app_name
    >> fmt "%%%%CreationDate: %s" (iso_time ())
    >> fmt "%%%%DocumentData: Clean7Bit"
    >> fmt "%%%%EndComments"
    >> drawing
    >> fmt "showpage"
    >> fmt "%%%%EOF"

The monad is used purely for its side-effects, namely writing to a file, so I don't bind, I always seq. After the drawing is complete, I actually want to see it on screen; for that, I write a temporary EPS file that I pass to /usr/bin/open, which in turn opens Preview, which converts the file to PDF and shows it:

  let show ?(margin=0.) width height f =
    let name = Filename.temp_file "grap" ".eps" in
    write name (eps margin width height f);
    ignore (Unix.system (Printf.sprintf "/usr/bin/open %s" name))

Indirect, but a showcase of the power of UNIX at work. The drawing operations themselves are pretty straightforward:

  let weight w        = fmt "%g setlinewidth" w
  and gray   w        = fmt "%g setgray" w
  and color (r, g, b) = fmt "%g %g %g setrgbcolor" r g b

  let save f = fmt "gsave" >> f >> fmt "grestore"

  let moveto (x, y) = fmt "%g %g moveto" x y
  let lineto (x, y) = fmt "%g %g lineto" x y
  let path f = fmt "newpath" >> f >> fmt "closepath"
  let draw   = fmt "stroke"
  let paint  = fmt "fill"

As is save, path is a higher-order function that constructs a complete path with the drawing commands. This comes handy:

  let dot (x, y) =
    path (fmt "%g %g currentlinewidth 1.5 mul 0 360 arc" x y) >> paint

  let line p q =
    fmt "newpath" >> moveto p >> lineto q >> draw

  let rect ?(fill=false) (x, y) (x', y') =
    let w, h = x' -. x, y' -. y in
    fmt "%g %g %g %g rect%s"
      x y w h (if fill then "fill" else "stroke")

Polygons are built out of a sequence of points. The first one is the starting point and must be passed to moveto, the rest are to be arguments of linetos. A mapping would do, except for the fact that the code is in monadic style, and must use mapm_:

  let polyline closing l = match l with
  | []      -> return ()
  | p :: ps -> path (moveto p >> M.mapm_ lineto ps) >> closing

  let poly ?(fill=false) = polyline (if fill then paint else draw)

Note also that I avoid invoking graphics primitives if the polygon is empty. With this, a simple example works as expected:

let () =
  let module M = struct
    open Eps
    open Eps.M.Op
    let draw p = show 100. 100. (poly p >> (M.mapm_ dot (List.tl p)))
  end in M.draw (List.map (fun i ->
    let a = float i *. 2. *. pi /. 10. in
    let c, s = cos a, sin a in
    (50. +. 20. *. c, 50. +. 20. *. s)
  ) (iota 11))

Success! A decagon with fat vertices. The only problem with an EPS back-end is that it is difficult to get the optimal width and height for an image. In other words, it would be ideal to have some auto-scaling. For that, I have to know the size of a drawing before turning it into PostScript code. Enter the Bounds back-end:

module Bounds = struct
  type bounds = { top : float; left : float; bottom : float; right : float; }

  let empty =
    { top = infinity; left = infinity;
      bottom = neg_infinity; right = neg_infinity; }

I keep track of the bounding box of the commands seen by the interface, as a rectangle given by two pairs of coordinates. The initial, empty bounds are such that I can accumulate a point simply by a series of minimum and maximum operations:

  let add b (x, y) =
    { top    = min y b.top;
      left   = min x b.left;
      bottom = max y b.bottom;
      right  = max x b.right; }

Now the commands will have to preserve the bounding box seen so far. This is where the State monad comes in:

  module St = State(struct type t = bounds end)

  module M = Monad(St)
  type t = unit M.m
  open M.Op

A command that doesn't paint to the output area will be a nop. A command that adds a point p updates the bounds with that point and changes the state accordingly (cf with incr above):

  let nop      = M.return ()
  and draw   p = St.get >>= fun b -> St.put (add b p)

Again, I'm not interested in the final value of the drawing, only in the final bounding box, that is, the state after a series of commands are applied to the empty bounding box:

  let bounds f = St.run f empty

The commands themselves are trivial: those that merely alter the graphics state are nops, those that draw accumulate points on the bounding box:

  let weight       _ = nop
  and gray         _ = nop
  and color        _ = nop
  and save         f = f
  and dot          p = draw p
  and line       p q = draw p >> draw q
  and rect ?fill p q = draw p >> draw q
  and poly ?fill   l = M.mapm_ draw l

Now the previous code runs almost unmodified, except that the call to Eps.draw is replaced by a call to Bounds.bounds:

let b =
  let module M = struct
    open Bounds
    open Bounds.M.Op
    let draw p = bounds (poly p >> (M.mapm_ dot (List.tl p)))
  end in M.draw (List.map (fun i ->
    let a = float i *. 2. *. pi /. 10. in
    let c, s = cos a, sin a in
    (50. +. 20. *. c, 50. +. 20. *. s)
  ) (iota 11))

Which results in:

val b : Bounds.bounds =
  {Bounds.top = 30.9788696740969272; Bounds.left = 30.;
   Bounds.bottom = 69.0211303259030728; Bounds.right = 70.}

This allows me to scale up the drawing to fit the boundaries of the EPS file, or to adjust the size of the EPS file so that no space is wasted. The important thing is that the code that contains the drawing commands is left unchanged. Monads are plug-ins.

Next, I'll show you pretty pictures:


A Family Portrait

I succeeded in finding a minimal 8-element sorting network with 19 comparators grouped in 6 stages. For that, I needed a change in my fitness evaluation function to reflect an insight I had while preparing the last post. My original fitness evaluator was monotone first on size and then on depth, so that fitness(s, d) < fitness(s′, d′)   ⇔   s < s′  ∨  s = s′d < d′. It is, I found experimentally, reward more shallower than shorter networks, so that fitness(s, d) < fitness(s′, d′)   ⇔   d < d′  ∨  d = d′s < s′. For that, note that 0 < ds, so that 0 < d/s ≤ 1, or 0 ≤ 1 - d/s < 1, and so d + 1 - d/s is monotone as required. To convert a decreasing "penalty" into an increasing fitness, I use the mapping I showed the last time, xx/(x - 1):

let target_fitness (size, depth) =
  let size, depth = float size, float depth in
  1. +. size /. ((size -. 1.) *. depth)

(the entire program, minus comments, is available as a paste in OCaml Forge). This means that, sometimes, in order to decrease depth a larger network would be chosen as the winner of the tournament, but that is, apparently, the key to a successful search. With that, here are some minimal networks of order up to 8:

  1. Minimal network of width 3, size 3 and depth 3 (ganet -w 3 -s 2 -x 0.8 -m 0.5)
  2. Minimal network of width 4, size 5 and depth 3 (ganet -w 4 -s 1 -x 0.8 -m 0.05)
  3. Minimal network of width 5, size 9 and depth 5 (ganet -w 5 -s 1 -x 0.8 -m 0.05)
  4. Minimal network of width 6, size 12 and depth 5 (ganet -w 6 -s 1 -x 0.8 -m 0.05)
  5. Minimal network of width 7, size 16 and depth 6 (ganet -w 7 -p 100 -s 1 -x 0.8 -m 0.05)
  6. Minimal network of width 8, size 19 and depth 6 (ganet -w 8 -p 640 -s 128 -x 0.9 -m 0.09)


Sorting out Creationism

I told you there's no end to tweaking a Genetic Algorithm. As it is, the program is not very powerful; it cannot find minimal networks of width 7:

% ./ganet -width 7 -pool 100 -seed 11
Searching for a 7-network of size 16 and depth 6 (fitness = 34.00000)
      1 act =  3.00%, avg = 8.66667, max = 10.00000 (39/24)
      2 act =  4.00%, avg = 9.25000, max = 11.00000 (38/23)
      3 act =  6.00%, avg = 9.00000, max = 13.00000 (36/21)
     10 act = 21.00%, avg = 8.95238, max = 14.00000 (35/25)
     21 act = 48.00%, avg = 10.60417, max = 16.00000 (33/21)
     32 act = 53.00%, avg = 11.16981, max = 17.00000 (32/20)
     45 act = 53.00%, avg = 14.47170, max = 18.00000 (31/20)
     46 act = 54.00%, avg = 14.68519, max = 20.00000 (29/18)
     53 act = 61.00%, avg = 15.31148, max = 21.00000 (28/20)
     62 act = 65.00%, avg = 17.21538, max = 23.00000 (26/16)
     77 act = 70.00%, avg = 20.44286, max = 24.00000 (25/14)
     78 act = 69.00%, avg = 21.08696, max = 26.00000 (23/14)
     84 act = 66.00%, avg = 22.22727, max = 27.00000 (22/11)
     91 act = 77.00%, avg = 23.89610, max = 28.00000 (21/10)
     92 act = 74.00%, avg = 23.98649, max = 29.00000 (20/11)
     97 act = 75.00%, avg = 25.33333, max = 30.00000 (19/10)
    111 act = 82.00%, avg = 27.89024, max = 31.00000 (18/10)
    137 act = 84.00%, avg = 30.30952, max = 32.00000 (17/10)

Solution in 13781 rounds with fitness 32.000000
Found a 7-network of size 17 and depth 9

There are a number of things that can be changed, beyond the command-line parameters. Mind you, these are important, especially the pool size: too large a pool, and evaluation will spend all your patience; too little, and the population gets stuck on a local maximum. Beyond that, it's code intervention time.

The first weak spot is the fitness evaluation function. My original intent was to give absolute priority to network size, and then to network depth; in other words, fitness(s, d) < fitness(s′, d′)   ⇔   s < s′  ∨  s = s′d < d′. This is restrictive, because a reduction in depth may trigger a further reduction in size. A more graded fitness measure rewards reductions both in size and depth, even if that means that your GA spends a little more optimizing the latter.

let target_fitness (size, depth) =
  let size, depth = float size, float depth in
  let overhead = size +. depth /. size in
  overhead /. (overhead -. 1.)

The idea is to compute a penalty score comprised of a whole point per size, and a fraction of a size per level, so that a worst-case O(n²) network is not as bad as a great (n + 1)-level sorter, preserving monotonicity. Since in this program, a fitter individual has a greater score, and individuals that sort, I use inversion to change the overhead into a fitness value. As you can see, this value is the nearer to one the larger the network is:

# Array.map (fun (s,_,d) -> target_fitness (s,d)) known_best_bounds ;;
- : float array =
[|nan; nan; 2.; 1.33333333333333326; 1.21739130434782616;
  1.11688311688311681; 1.08759124087591252; 1.06504065040650397;
  1.05459770114942519; 1.04118616144975284; 1.03540903540903551;
  1.02921535893155269; 1.02617449664429539; 1.02262443438914019;
  1.01992966002344665; 1.01812884428617667; 1.01690617075232459|]

(the first two positions make no sense, and are only sentinels). With that change, the GA progresses a bit further:

% ./ganet -width 7 -pool 100 -seed 11
Searching for a 7-network of size 16 and depth 6 (fitness = 1.06504)
      1 act =  3.00%, avg = 1.02506, max = 1.02590 (39/24)
      2 act =  4.00%, avg = 1.02545, max = 1.02659 (38/23)
⟨…22 lines deleted…⟩
    160 act = 73.00%, avg = 1.04722, max = 1.05751 (18/ 7)
    189 act = 85.00%, avg = 1.05599, max = 1.06093 (17/ 7)

Solution in 47744 rounds with fitness 1.060932
Found a 7-network of size 17 and depth 7

But not enough to reach the target fitness. Some other idea is needed, and it is to actually respect the definition of Microbial Tournament laid out by Inman Harvey. His method matches up individuals a pair at a time, not the entire population at once. This requires more iterations, but each one involves a single mating, so the efficiency of doing pair-at-a-time tournaments is mostly the same. First, I need a way to draw a pair of individuals at random, uniformly form the population. The twist is that I must not pit an individual against itself:

let random_disjoint_pair n =
  let i = Random.int n in
  let j = Random.int (pred n) in
  if i > j then (i, j) else (i, succ j)

To draw a random disjoint pair 0 ≤ ij < n with uniform probabilities, it's best to treat the problem as if it were an urn drawing without replacement. In contrast to the generation of ordered pairs, this makes it easy to select both members of such a non-diagonal pair with exactly uniform probabilities: select 0 ≤ i < n with uniform probability 1/n. This leaves n - 1 possibilities for j: draw it from the set 0 ≤ j < n - 1 with uniform probability 1/(n - 1). If ji, bump j up by one to simulate the "hole" left behind by the drawing of i. With that, the tournament proceeds not in block, but pair by pair:

let tournament fitness p =
  let iter = ref 0
  and last = ref 0. in
  Sys.catch_break true;
  begin try while p.best.score < fitness do
    (* Only this line changes! *)
    microbial_tournament p (random_disjoint_pair !pool_size);
    incr iter;
    if 1. <= p.best.score && !last < p.best.score then begin
      let (s, d) = count_genes p.best.genes in
      last := p.best.score;
      let sum, act = Array.fold_left (fun (s, n as p) g ->
        if g.score < 1. then p else (s +. g.score, succ n))
        (0., 0) p.pool in
      Printf.fprintf stderr "%7d act = %5.2f%%, avg = %7.5f, max = %7.5f (%2d/%2d)\n"
        (100. *. float act /. float !pool_size)
        (sum /. float act)
        s d;
      flush stderr
  done with Sys.Break -> prerr_endline "Interrupted.\n" end;

That's it, just one line. Let's try it:

% ./ganet -width 7 -pool 100 -seed 11
Searching for a 7-network of size 16 and depth 6 (fitness = 1.06504)
      1 act =  3.00%, avg = 1.02506, max = 1.02590 (39/24)
    174 act =  7.00%, avg = 1.02528, max = 1.02727 (37/25)
⟨…30 lines deleted…⟩
  11019 act = 82.00%, avg = 1.05993, max = 1.06478 (16/ 7)
 171081 act = 86.00%, avg = 1.06386, max = 1.06504 (16/ 6)
Solution in 171081 rounds with fitness 1.065041
Found a 7-network of size 16 and depth 6

Success! It takes a while to shave off the last level, but it does. Note how the percent of active individuals (that is, those that actually sort) climb up as the fitness of the best network approaches the target. The downside of this is that the fact that the GA found a network at all is somewhat of a fluke; smaller or larger pools still make finding a minimal network a hit-and-miss proposition. The last level is the hardest, which is an indication that there is a local minimum that is hard to break out of. For instance, for a smaller pool I had to also change the seed in order to find a solution:

% ./ganet -width 7 -pool 49 -seed 12
Searching for a 7-network of size 16 and depth 6 (fitness = 1.06504)
      1 act =  8.16%, avg = 1.02612, max = 1.02799 (36/26)
     55 act = 14.29%, avg = 1.02624, max = 1.02804 (36/24)
⟨…27 lines deleted…⟩
   6852 act = 87.76%, avg = 1.06048, max = 1.06478 (16/ 7)
 106973 act = 87.76%, avg = 1.06390, max = 1.06504 (16/ 6)
Solution in 106973 rounds with fitness 1.065041
Found a 7-network of size 16 and depth 6

The time to hone into a 16-step solution was quicker, but the time needed to reduce the depth took another order of magnitude like before. A smaller yet pool shows an interesting fact: if the entire population is made of proper sorters, it will be difficult to find in the gene pool a variation that would lead to a smaller sorter. Note how the fraction of active networks remains high throughout:

% ./ganet -width 7 -pool 10 -seed 1
Searching for a 7-network of size 16 and depth 6 (fitness = 1.06504)
     46 act =  10.00%, avg = 1.02519, max = 1.02519 (40/28)
     57 act =  30.00%, avg = 1.02519, max = 1.02520 (40/27)
⟨…22 lines deleted…⟩
   1416 act = 100.00%, avg = 1.04621, max = 1.04651 (22/11)
   1551 act = 100.00%, avg = 1.04672, max = 1.04872 (21/11)
   1936 act =  80.00%, avg = 1.04874, max = 1.05115 (20/11)
   2376 act = 100.00%, avg = 1.05139, max = 1.05382 (19/11)
   2458 act =  90.00%, avg = 1.05384, max = 1.05398 (19/10)
   2569 act =  80.00%, avg = 1.05402, max = 1.05714 (18/ 9)
   2585 act = 100.00%, avg = 1.05528, max = 1.05751 (18/ 7)
 110511 act = 100.00%, avg = 1.05749, max = 1.06071 (17/ 8)
 257925 act =  70.00%, avg = 1.06126, max = 1.06452 (16/ 8)
 673211 act = 100.00%, avg = 1.06167, max = 1.06478 (16/ 7)
3081061 act =  70.00%, avg = 1.06370, max = 1.06504 (16/ 6)
Solution in 3081061 rounds with fitness 1.065041
Found a 7-network of size 16 and depth 6

Which indicates that the smaller the pool the larger should be the mutation probability, but this entails a danger of the algorithm devolving into a pure random search. Notwithstanding that, the genetic probabilities crossover_rate and mutation_rate should be run-time-settable parameters. In fact, even the selection of a random seed is harder than it should be: most of the time I want to explore different pool sizes but I'd rather not think of random seeds to give the program.

Now, with all those changes selecting values for the parameters is a random search of itself; however, the value act is a good indicator of the population health, ironically enough:

% ./ganet -w 7 -p 100 -s 0 -m 0.03 -x 0.8
Random seed: 774730236
Searching for a 7-network of size 16 and depth 6 (fitness = 1.06504)
      1 act =   2%, avg = 1.02430, max = 1.02460 (41/27)
     20 act =   3%, avg = 1.02506, max = 1.02657 (38/24)
⟨…30 lines deleted…⟩
   5570 act =  66%, avg = 1.05069, max = 1.06050 (17/ 9)
   6333 act =  66%, avg = 1.05344, max = 1.06071 (17/ 8)
  16773 act =  65%, avg = 1.05829, max = 1.06093 (17/ 7)
 143494 act =  68%, avg = 1.05764, max = 1.06115 (17/ 6)
 146242 act =  66%, avg = 1.05825, max = 1.06504 (16/ 6)
Solution in 146242 rounds with fitness 1.065041
Found a 7-network of size 16 and depth 6

(this is with a modified version that makes all parameters settable through command line options). It is evident that, with the high churn rate implied by a mutation rate of 3% even with a high rate of copying of 80%, the search proceeds rather quickly by minimizing depth before length. In a sense, it is necessary to intervene heavily in the design of a GA experiment if it is to have even a modest chance of success.

Sorting out Evolution

As a belated end to my exploration of sorting networks (1, 2, 3, 4, 5), I experimented with genetic algorithms to find minimal sorting networks for n elements. And when I write experimented, I mean a couple of weeks of tweaking parameters and code exploring ways for my program to run faster, and to find better and larger networks. Before you continue reading, let me warn you that I wasn't entirely successful: this program works fine for small networks (n ≤ 8), but I couldn't extend this approach to larger, more interesting ones. As it is, though, I had plenty of fun.

The problem of finding minimal sorting networks of a given width is both old and hard; Knuth's examples show uncertainty about minimality for sorters as small as for 16 elements. Hugues Juillé shows that Genetic Algorithms can find minimal networks, but he regrettably keeps the how of it to himself. That said, the table he presents is a valuable starting point as it constitutes an objective target for my program to aim towards:

let known_best_bounds = [|
(* size, depth L.B., depth U.B.) *)
(*  0 *)  0, 0, 0;
(*  1 *)  0, 0, 0;
(*  2 *)  1, 1, 1;
(*  3 *)  3, 3, 3;
(*  4 *)  5, 3, 3;
(*  5 *)  9, 5, 5;
(*  6 *) 12, 5, 5;
(*  7 *) 16, 6, 6;
(*  8 *) 19, 6, 6;
(*  9 *) 25, 7, 7;
(* 10 *) 29, 7, 7;
(* 11 *) 35, 7, 8;
(* 12 *) 39, 7, 8;
(* 13 *) 45, 7, 9;
(* 14 *) 51, 7, 9;
(* 15 *) 56, 7, 9;
(* 16 *) 60, 7, 9;

The network size is the über-parameter of the simulation that determines all other tweaks and knobs; since I want to run the program to look for networks of various widths without having to recompile, I make it a reference:

let network_width = ref 7

Part of the difficulty of effectively applying GAs to solving hard combinatorial problems is deciding on a good genomic encoding of the problem's parameters. Such an encoding must be readily amenable to the two basic operations in a GA search, namely crossover and mutation. I settled on representing directly a sorting network as an array of pairs (i, j) with 0 ≤ ij < network_width, each representing an exchange box between wires i and j. If i = j, the "exchange" box is the null operation; this will allow for encoding sorting networks of actual varying size on a fixed-length genome.

type genome = (int * int) array

I use an array not only for easy picking of the individual "genes" (the exchange boxes), but so that each genome is actually mutable; this simplifies the bookkeeping needed to complete a tournament (of which more, later). Even so, many operations on a genome as a whole are mostly traversals, with some operation applied to each gene; I found it convenient to abstract the traversal as a generic fold:

let fold_genome f e (g : genome) =
  let bounds = Array.make !network_width 0 in
  fst (Array.fold_left (fun (e, depth) (i, j) ->
    if i == j then (e, depth) else
    let d = max depth (1 + max bounds.(i) bounds.(j)) in
    bounds.(i) <- d; bounds.(j) <- d;
    (f (d > depth) e (i, j), d)) (e, 0) g)

I had presented this algorithm before in "Picturing Networks"; the gist of it is that bounds keeps tab of the "stage" or depth a given sorter must be scheduled at. For instance, the basic measure of a genome is the number of productive genes it has; in other words, how many exchange boxes are actually attached to a different pair of wires. The other basic metric of a network is its depth. This fold handily lets me find both quantities at once:

let count_genes =
  fold_genome (fun is_sequential (size, depth) _ ->
    succ size, if is_sequential then succ depth else depth)
    (0, 0)

Another rather trivial application of this fold is to print a genome in a way that highlights each sequential stage:

let string_of_genome (g : genome) =
  let buffer = Buffer.create 16 in
  ignore (fold_genome (fun is_sequential any (i, j) ->
    if any then
      Buffer.add_char buffer (if is_sequential then ';' else ',');
    Buffer.add_string buffer (string_of_int i);
    Buffer.add_char   buffer ':';
    Buffer.add_string buffer (string_of_int j);
    true) false g);
  Buffer.contents buffer

Genomes must randomly sample the space of possibilities; a truly (that is, uniformly) random genome must draw from a space of random genes. To construct a random pair 0 ≤ i < j < n with uniform probabilities, the most expedient way is to use rejection:

let rec random_ordered_pair n =
  let i, j = Random.int n, Random.int n in
  if i <= j then i, j else random_ordered_pair n

There is an exact method using the inverse of the cumulative distribution function, but it involves taking square roots, and this method is actually faster (this was an investigation of its own). With that, a random gene is an appropriately parameterized random exchange box:

let new_gene () = random_ordered_pair !network_width

Once a random exchange box is in place in a network, the mutation operator must controllably change it into another box. One alternative is to replace it with a completely different one; a more parsimonious approach is to detach a wire and reattach it at random. Two things must be ensured, though: first, the change must preserve the basic property that the exchange box be a nondecreasing pair of numbers; second, it must be uniform in the space of possible changes so defined. Each wire can be chosen with equal probabilities, but then which of the two terminals gets reattached depends on the placement of the exchange box relative to the selected wire:

let mutate_gene (i, j) =
  let k = Random.int !network_width in
  if k < j then (k, j) else (j, k)

My preoccupation with ensuring that all choices are uniform is not just theoretic; any skew in the search has readily observable consequences. I had an error in the choice of random genes which prevented the program to explore further than networks of width 6; once I corrected it, I could test wider networks. With this, there's the problem of selecting a random genome, which is trivial except for the choice of genome length. How many exchange boxes should a genome have? Too small a number, and the network would never sort; too large and it will take forever to minimize and, what's worse, the probability that a crossover would improve the offspring would plummet (in other words, the search space would be just too large). I found a number both reasonable and completely arbitrary:

let genome_length = !network_width * !network_width

Reasonable because it's easy to sort in quadratic time (witness Bubble Sort); arbitrary since it only depends on the network width. The next completely arbitrary but vital choice is in deciding how to compare networks. The fitness of a genome is the sole measure that determines in which direction the search proceeds; even more, the choice of a fitness function is what makes a Genetic Algorithm go boom or bust. In this case, the complication of selecting a good fitness evaluation resides in that I'm trying to minimize two quantities at once: size and depth of the sorting network. There's much literature on multiobjective optimization that I should have read; what's really important is that the combination of individual objectives must be "monotone", or Pareto dominant if it is to succeed; this is a complicated way to say that the fitness must be a total order on objective tuples, in this case pairs of (size, depth):

let target_fitness (size, depth) =
  let min_size, _, min_depth = known_best_bounds.(!network_width) in
  if size == min_size
  then float (genome_length - size + 1 - (depth - min_depth))
  else float (genome_length - size)

I aim for known_best_bounds for the given network_width. The nearer the metrics of the genome are to those numbers, the fitter it will be deemed. Note that, for n ≥ 2, target_fitness (n, 1) - target_fitness (n, n) < target_fitness (n-1, n-1) - target_fitness (n, n); hence the fitness function is monotone in size and in depth, in that order, as required. Now, how to determine if a network is fit or not? The least I can expect of a sorting network is for it to actually sort its input. Trying all network_width! (that's a factorial) possible inputs is unfeasible; fortunately, Knuth showed that it suffices to test all 2network_width binary sequences of length network_width. The Zero-One Principle says that a network sorting the latter is a sufficient condition for it sorting the former. What's more, the 0-1 principle implies that a sorting network can be modeled by a binary circuit. Each exchange box is then a binary function with table:


That is, f(in0, in1) = (in0in1, in0in1).

Aside: I find endlessly confusing the fact that the maximum ↑ and minimum ↓ operators are respectively the join ∨ and meet ∧ of the integer lattice, but the arrows go backwards. Who to blame? Who came with the logical connectives?

An exchange box, hence, needs to select bits and apply the corresponding functions according to a mask. To sort a binary word, it suffices to build the masks for each exchange box, and with that apply the sorting function f defined above:

let sort a word =
  Array.fold_left (fun word (i, j) ->
    (* ij *)
      lor  (word lsr (j - i) land     (1 lsl i))
      land (word lsl (j - i) lor lnot (1 lsl j))
  ) word a

Note that I actually sort in reverse order, that is, according to f′(in0, in1) = (in0in1, in0in1). This is so that testing for sortedness is now as simple as checking that the result is a block of contiguous ones, from the least significant bit upwards, using the old trick that relies on the fact that, in two's complement, a block of k contiguous 1's represents exactly 2k - 1:

let is_sorted word = word land (word + 1) == 0

But obviously the fitness of a sorting network is not just determined by its ability to sort, but by the number of exchange boxes it employs to do so. To evaluate the first criterion, generate all 2network_width binary words and test that all are correctly sorted. If they aren't, the computed fitness is, rather arbitrarily, a fraction based on the first sequence the network didn't sort. This is so that the fitness is a total order on the space of genomes:

exception Unsorted of int

let evaluate_fitness (g : genome) =
  let tests = 1 lsl !network_width in
    for i = 0 to pred tests do
      if not (is_sorted (sort g i)) then raise (Unsorted i)
    target_fitness (count_genes g)
  with Unsorted i -> float i /. float tests

So much for genes and genomes; an individual is a genome and a measure of its fitness:

type individual = { mutable score : float; genes : genome; }

Its score is as mutable as its genome is. A new individual is born with a menagerie of random genes:

let new_individual () =
  let g = Array.init genome_length (fun _ -> new_gene ())
  in { score = evaluate_fitness g; genes = g; }

Analogously, a population is a collection of individuals of which there is a unique best champion:

type population = { mutable best : individual; pool : individual array; }

And now for another arbitrary parameter, namely the size of the population, or rather gene pool. As with the genome size, it is a function of the network_width to be minimized:

let pool_size = ref (100 * !network_width * !network_width)

I've experimented with a pool size cubic in the network_width, but I settled on a quadratic dependence, appropriately scaled. This is too big for small widths, but appropriate for larger ones. In any case, I found it convenient to try different values for it from run to run, so it is a reference. A a population is an array of genomes of this size:

let find_best pool =
  Array.fold_left (fun w g -> if w.score > g.score then w else g) pool.(0) pool

let new_population () =
  let p = Array.init !pool_size (fun _ -> new_individual ())
  in { best = find_best p; pool = p; }

Now would the knobs to tweak end? I'm almost there; but these are fundamental to the performance of the program, and can be the focus of endless tweaking. The GA probabilities:

let crossover_rate = 0.6
and mutation_rate = 0.01

determine the rate of application of the GA operators to the genomes in what's called a tournament, the Darwinian show-down that makes the core of GAs. I used the so-called microbial tournament as it is really simple and easy to implement. The losing genome will have some of its genes replaced with the corresponding ones from the winner, and sometimes mutated according to these numbers:

let microbial_tournament p (i, j) =
  let g1, g2 = p.pool.(i), p.pool.(j) in
  let winner, loser =
    if g1.score < g2.score || g1.score = g2.score && g2 == p.best
    then g2, g1
    else g1, g2 in
  assert (p.best != loser);
  (* Recombination *)
  for i = 0 to genome_length - 1 do
    if Random.float 1. < crossover_rate then
      loser.genes.(i) <- winner.genes.(i);
  (* Mutation *)
  for i = 0 to genome_length - 1 do
    if Random.float 1. < mutation_rate then
      loser.genes.(i) <- mutate_gene loser.genes.(i)
  loser.score <- evaluate_fitness loser.genes;
  if p.best.score <= loser.score then p.best <- loser

In a microbial tournament between two individuals i and j in the population p, both are evaluated and deemed a winner and loser. The latter is never the best individual in the population, by definition. Genes from the winner are copied to the loser's genome, according to crossover_rate; then the loser genes are mutated according to mutation_rate. The key in this type of tournaments is that the loser individual is replaced by the offspring resulting from this process, which is evaluated, and if found better than the best so far, promoted.

Now this process is repeated for pairs of individuals drawn at random from the population. It is equivalent, but faster, to partition the pool into halves, perform a random pairing between both, and do a tournament between each pair:

let shuffle a =
  for j = 1 to Array.length a - 1 do
    let k = Random.int (j + 1) in
    let t = a.(j) in
    a.(j) <- a.(k);
    a.(k) <- t

let full_microbial_tournament p =
  shuffle p.pool;
  let i = ref 0 in
  while !i < !pool_size - 1 do
    microbial_tournament p (!i, !i + 1);
    i := !i + 2

Now I apply the full_microbial_tournament iteratively until the best individual in the population reaches the target fitness. Whenever there's a fitness bump the individual promoted is printed out, together with some statistics. In the event of a console break, the loop is terminated cleanly. In any case, the result of the function is the number of iterations spent in the loop:

let tournament fitness p =
  let iter = ref 0
  and last = ref 0. in
  Sys.catch_break true;
  begin try while p.best.score < fitness do
    full_microbial_tournament p;
    incr iter;
    if 1. <= p.best.score && !last < p.best.score then begin
      let (s, d) = count_genes p.best.genes in
      last := p.best.score;
      let sum, act = Array.fold_left (fun (s, n as p) g ->
        if g.score < 1. then p else (s +. g.score, succ n))
        (0., 0) p.pool in
      Printf.fprintf stderr "%7d act = %5.2f%%, avg = %7.5f, max = %7.5f (%2d/%2d)\n"
        (100. *. float act /. float !pool_size)
        (sum /. float act)
        s d;
      flush stderr
  done with Sys.Break -> prerr_endline "Interrupted.\n" end;

A driver function selects the target fitness given by the best known bounds for the chosen network_width, prints out some starting information and builds an initial population and runs the iterative tournament with it:

let test () =
  let (size, _, depth) = known_best_bounds.(!network_width) in
  let fitness = target_fitness (size, depth) in
  Printf.fprintf stderr "Searching for a %d-network of size %d and depth %d (fitness = %7.5f)\n"
    !network_width size depth fitness;
  flush stderr;
  let pool = new_population () in
  let iter = tournament fitness pool in
  let n, d = count_genes pool.best.genes in
  Printf.fprintf stderr "Solution in %d rounds with fitness %f\n"
    iter pool.best.score;
  Printf.fprintf stderr "Found a %d-network of size %d and depth %d\n"
    !network_width n d;
  print_endline (string_of_genome pool.best.genes)

The results are printed out, and the best individual found is displayed. The main function makes running the GA from the command line convenient:

let main () =
  let usage = "usage - ganet [-seed <n>] [-pool <n>] -width <n>" in
  let spec = Arg.align [
    "-seed",  Arg.Int (fun n -> Random.init n), " random seed";
    "-pool",  Arg.Set_int pool_size,            " population pool";
    "-width", Arg.Set_int network_width,        " network width";
  ] in
  Arg.parse spec (fun _ -> raise (Arg.Bad "extra argument")) usage;
  if !network_width < 2 || !network_width > 16 then begin
    prerr_endline "network width must be between 2 and 16";
    exit 2
  if !pool_size < !network_width then begin
    prerr_endline "population pool must be as large as the width";
    exit 2
  test ()

let () = main ()

As with any experiment, repeatability must be designed in; programs that use random numbers should take the random seed as a parameter. You should get the same results from this run:

% ./ganet -seed 127 -pool 100 -width 5
Searching for a 5-network of size 9 and depth 5 (fitness = 41.00000)
      1 act = 84.00%, avg = 16.03571, max = 24.00000 (25/21)
      4 act = 93.00%, avg = 18.39785, max = 25.00000 (24/18)
      5 act = 88.00%, avg = 19.15909, max = 26.00000 (23/18)
      6 act = 86.00%, avg = 19.54651, max = 27.00000 (22/18)
     12 act = 79.00%, avg = 23.13924, max = 28.00000 (21/18)
     14 act = 82.00%, avg = 23.80488, max = 29.00000 (20/16)
     15 act = 85.00%, avg = 24.15294, max = 30.00000 (19/17)
     17 act = 86.00%, avg = 25.38372, max = 33.00000 (16/15)
     29 act = 73.00%, avg = 29.27397, max = 35.00000 (14/10)
     33 act = 80.00%, avg = 30.32500, max = 36.00000 (13/ 9)
     41 act = 70.00%, avg = 32.14286, max = 38.00000 (11/ 9)
     45 act = 67.00%, avg = 33.16418, max = 39.00000 (10/ 6)
     67 act = 69.00%, avg = 36.24638, max = 40.00000 ( 9/ 6)
     70 act = 76.00%, avg = 36.64474, max = 41.00000 ( 9/ 5)
Solution in 70 rounds with fitness 41.000000
Found a 5-network of size 9 and depth 5

As I disclaimed at the beginning, this program finds minimal sorting networks for 8 elements, but gets stuck searching for networks for 9 elements. I'd love to hear any improvements you find.