////////////////////////////////////////////////////////////////////////
// A simple 2-slice DBN (variables are not parameterized) exhibiting
// use of observation variables conditioned on next state.
//
// Author: Scott Sanner (ssanner [at] gmail.com)
////////////////////////////////////////////////////////////////////////
domain prop_dbn_po {
requirements = {
reward-deterministic, // Reward is a deterministic function
partially-observed // Uses observation nodes
};
pvariables {
p : { state-fluent, bool, default = false };
q : { state-fluent, bool, default = false };
r : { state-fluent, bool, default = false };
o0 : { observ-fluent, bool };
o1 : { observ-fluent, bool };
o2 : { observ-fluent, bool };
a : { action-fluent, bool, default = false };
};
cpfs {
// Some standard Bernoulli conditional probability tables
p’ = if (p ^ r) then Bernoulli(.9) else Bernoulli(.3);
q’ = if (q ^ r) then Bernoulli(.9)
else if (a) then Bernoulli(.3) else Bernoulli(.8);
// KronDelta is like a DiracDelta, but for discrete data (boolean or int)
r’ = if (~q) then KronDelta(r) else KronDelta(r <=> q);
// Observations reflect next state
o0 = KronDelta( r’ );
o1 = KronDelta( p’ );
o2 = KronDelta( q’ );
};
// A boolean functions as a 0/1 integer when a numerical value is needed
reward = p + q + r;
}
instance inst_dbn_po {
domain = prop_dbn_po;
init-state {
p = true; // could also just say ‘p’ by itself
q = false; // default so unnecessary, could also say ‘~q’ by itself
r; // same as r = true
};
max-nondef-actions = 1;
horizon = 20;
discount = 0.9;
}