domain game_of_life_mdp {
requirements = {
reward-deterministic
};
types {
y_pos : object;
x_pos : object;
};
pvariables {
set(x_pos, y_pos) : {action-fluent, bool, default = false};
NOISE-PROB(x_pos, y_pos) : {non-fluent, real, default = 0.1};
NEIGHBOR(x_pos, y_pos, x_pos, y_pos) : {non-fluent, bool, default = false};
alive(x_pos, y_pos) : {state-fluent, bool, default = false};
};
cpfs {
(alive’ ?x ?y) = (if (| (^ (alive ?x ?y) (>= (sum ( (?x2 : x_pos) (?y2 : y_pos) ) (^ (NEIGHBOR ?x ?y ?x2 ?y2) (alive ?x2 ?y2) )) 2) (<= (sum ( (?x2 : x_pos) (?y2 : y_pos) ) (^ (NEIGHBOR ?x ?y ?x2 ?y2) (alive ?x2 ?y2) )) 3) ) (^ (~ (alive ?x ?y)) (== (sum ( (?x2 : x_pos) (?y2 : y_pos) ) (^ (NEIGHBOR ?x ?y ?x2 ?y2) (alive ?x2 ?y2) )) 3) ) (set ?x ?y) ) then (Bernoulli (- 1.0 (NOISE-PROB ?x ?y))) else (Bernoulli (NOISE-PROB ?x ?y)));
};
reward = (sum ( (?x : x_pos) (?y : y_pos) ) (- (alive ?x ?y) (set ?x ?y)));
state-action-constraints {
(forall ( (?x : x_pos) (?y : y_pos) ) (^ (>= (NOISE-PROB ?x ?y) 0.0) (<= (NOISE-PROB ?x ?y) 1.0) ));
};
}