////////////////////////////////////////////////////////////////////
// A simple DBN to encode Conway’s cellular automata “game of life”
// on a grid with some additional rules. One gets a reward for
// generating patterns that keep the most cells alive.
//
// Author: Scott Sanner (ssanner [at] gmail.com)
////////////////////////////////////////////////////////////////////
domain game_of_life_stoch {
requirements = { reward-deterministic };
types {
x_pos : object;
y_pos : object;
};
pvariables {
// Probability cell topology non-fluents (unchanging)
PROB_REGENERATE : { non-fluent, real, default = 0.5 };
NEIGHBOR(x_pos,y_pos,x_pos,y_pos) : { non-fluent, bool, default = false };
// State, intermediate and action fluents
alive(x_pos,y_pos) : { state-fluent, bool, default = false };
count-neighbors(x_pos,y_pos) : { interm-fluent, int, level = 1 };
set(x_pos,y_pos) : { action-fluent, bool, default = false };
};
cpfs {
// Conway’s game of life rules (from Wikipedia):
// 1. Any live cell with fewer than two live neighbors dies, as if caused by under-population.
// 2. Any live cell with more than three live neighbors dies, as if by overcrowding.
// 3. Any live cell with two or three live neighbors lives on to the next generation.
// 4. Any dead cell with exactly three live neighbors becomes a live cell, as if by reproduction.
//
// Scott’s additional rules for RDDL:
// 1. To make stochastic: we’ll make the above rules hold with PROB_REGENERATE certainty
// 2. To add complexity: if all cells at same x-value are dead, a cell randomly regenerates
// 3. For interactivity: we allow an agent to explicitly set different cells (concurrently)
// It’s useful to compute the alive-neighbor count for each cell and store it in an intermediate variable
count-neighbors(?x,?y) = KronDelta(sum_{?x2 : x_pos, ?y2 : y_pos} [NEIGHBOR(?x,?y,?x2,?y2) ^ alive(?x2,?y2)]);
// Stochastically determine whether cell (?x,?y) is alive in next state
alive'(?x,?y) = if (forall_{?y2 : y_pos} ~alive(?x,?y2))
then Bernoulli(PROB_REGENERATE) // Rule 6
else if ([alive(?x,?y) ^ (count-neighbors(?x,?y) >= 2)
^ (count-neighbors(?x,?y) <= 3)]
| [~alive(?x,?y) ^ (count-neighbors(?x,?y) == 3)]
| set(?x,?y))
then Bernoulli(PROB_REGENERATE)
else Bernoulli(1.0 - PROB_REGENERATE);
};
// Reward is number of alive cells
reward = sum_{?x : x_pos, ?y : y_pos} alive(?x,?y);
state-action-constraints {
// Assertion: ensure PROB_REGENERATE is a valid probability
(PROB_REGENERATE >= 0.0) ^ (PROB_REGENERATE <= 1.0);
// Precondition: let's say we should not set a cell if it is already alive
forall_{?x : x_pos, ?y : y_pos} alive(?x,?y) => ~set(?x,?y);
};
}
non-fluents game2x2_stoch {
domain = game_of_life_stoch;
objects {
x_pos : {x1,x2};
y_pos : {y1,y2};
};
non-fluents {
PROB_REGENERATE = 0.9;
NEIGHBOR(x1,y1,x1,y2);
NEIGHBOR(x1,y1,x2,y1);
NEIGHBOR(x1,y1,x2,y2);
NEIGHBOR(x1,y2,x1,y1);
NEIGHBOR(x1,y2,x2,y1);
NEIGHBOR(x1,y2,x2,y2);
NEIGHBOR(x2,y1,x1,y1);
NEIGHBOR(x2,y1,x1,y2);
NEIGHBOR(x2,y1,x2,y2);
NEIGHBOR(x2,y2,x1,y1);
NEIGHBOR(x2,y2,x1,y2);
NEIGHBOR(x2,y2,x2,y1);
};
}
non-fluents game3x3_stoch {
domain = game_of_life_stoch;
objects {
x_pos : {x1,x2,x3};
y_pos : {y1,y2,y3};
};
non-fluents {
NEIGHBOR(x1,y1,x1,y2);
NEIGHBOR(x1,y1,x2,y1);
NEIGHBOR(x1,y1,x2,y2);
NEIGHBOR(x1,y2,x1,y1);
NEIGHBOR(x1,y2,x2,y1);
NEIGHBOR(x1,y2,x2,y2);
NEIGHBOR(x1,y2,x2,y3);
NEIGHBOR(x1,y2,x1,y3);
NEIGHBOR(x1,y3,x1,y2);
NEIGHBOR(x1,y3,x2,y2);
NEIGHBOR(x1,y3,x2,y3);
NEIGHBOR(x2,y1,x1,y1);
NEIGHBOR(x2,y1,x1,y2);
NEIGHBOR(x2,y1,x2,y2);
NEIGHBOR(x2,y1,x3,y2);
NEIGHBOR(x2,y1,x3,y1);
NEIGHBOR(x2,y2,x1,y1);
NEIGHBOR(x2,y2,x1,y2);
NEIGHBOR(x2,y2,x1,y3);
NEIGHBOR(x2,y2,x2,y1);
NEIGHBOR(x2,y2,x2,y3);
NEIGHBOR(x2,y2,x3,y1);
NEIGHBOR(x2,y2,x3,y2);
NEIGHBOR(x2,y2,x3,y3);
NEIGHBOR(x2,y3,x1,y3);
NEIGHBOR(x2,y3,x1,y2);
NEIGHBOR(x2,y3,x2,y2);
NEIGHBOR(x2,y3,x3,y2);
NEIGHBOR(x2,y3,x3,y3);
NEIGHBOR(x3,y1,x2,y1);
NEIGHBOR(x3,y1,x2,y2);
NEIGHBOR(x3,y1,x3,y2);
NEIGHBOR(x3,y2,x3,y1);
NEIGHBOR(x3,y2,x2,y1);
NEIGHBOR(x3,y2,x2,y2);
NEIGHBOR(x3,y2,x2,y3);
NEIGHBOR(x3,y2,x3,y3);
NEIGHBOR(x3,y3,x2,y3);
NEIGHBOR(x3,y3,x2,y2);
NEIGHBOR(x3,y3,x3,y2);
};
}
instance is1_stoch {
domain = game_of_life_stoch;
non-fluents = game3x3_stoch;
init-state {
alive(x1,y1);
alive(x2,y2);
};
max-nondef-actions = 1;
horizon = 20;
discount = 0.9;
}