////////////////////////////////////////////////////////////////////
// A simple continuous MDP for a single Mars Rover. Note that this
// models continuous time explicitly since there is only one rover.
//
// The goal here is to take as many high-value pictures (within their
// designated radii) as possible within the time constraints.
//
// Motivated by:
//
// Bresina, J. L.; Dearden, R.; Meuleau, N.; Ramkrishnan, S.;
// Smith, D. E.; and Washington, R. 2002. Planning under continuous
// time and resource uncertainty: A challenge for AI. UAI 2002.
//
// Author: Scott Sanner (ssanner@gmail.com)
////////////////////////////////////////////////////////////////////
domain simple_mars_rover {
requirements = {
concurrent, // x and y directions move independently and simultaneously
reward-deterministic, // this domain does not use a stochastic reward
intermediate-nodes, // this domain uses intermediate pvariable nodes
constrained-state // this domain uses state constraints
};
types {
picture-point : object;
};
pvariables {
// Problem constants
MAX_TIME : { non-fluent, real, default = 12.0 };
// Rover constants
MOVE_VARIANCE_MULT : { non-fluent, real, default = 0.5 };
// Each picture occurs in a different place and awards a different value
PICT_XPOS(picture-point) : { non-fluent, real, default = 0.0 };
PICT_YPOS(picture-point) : { non-fluent, real, default = 0.0 };
PICT_VALUE(picture-point) : { non-fluent, real, default = 1.0 };
PICT_ERROR_ALLOW(picture-point) : { non-fluent, real, default = 0.5 };
// Rover coordinates
xPos : { state-fluent, real, default = 0.0 };
yPos : { state-fluent, real, default = 0.0 };
time : { state-fluent, real, default = 0.0 };
// Rover actions — constraints enforce that a rover cannot snap a picture
// and move simultaneously
xMove : { action-fluent, real, default = 0.0 };
yMove : { action-fluent, real, default = 0.0 };
snapPicture : { action-fluent, bool, default = false };
};
cpfs {
// Update rover coordinates based on movement, we assume surface
// of Mars has no coordinate constraints. Can add if needed.
xPos’ = xPos + xMove + Normal(0.0, MOVE_VARIANCE_MULT*xMove);
yPos’ = yPos + yMove + Normal(0.0, MOVE_VARIANCE_MULT*yMove);
// We assume taking a picture requires 1/4 hour and movement takes
// the Manhattan distance time (assuming units are meters and speed
// is 1 m/hour). Euclidean distance would be more plausible, but
// we need to add elementary functions like sqrt into RDDL for this.
// Even an absolute value would simplify things here as well.
time’ = if (snapPicture)
then DiracDelta(time + 0.25)
else DiracDelta(time +
[if (xMove > 0) then xMove else -xMove] +
[if (yMove > 0) then yMove else -yMove]);
};
// We get a reward for any picture taken within picture box error bounds
// and the time limit.
reward = if (snapPicture ^ (time <= MAX_TIME))
then sum_{?p : picture-point} [
if ((xPos >= PICT_XPOS(?p) – PICT_ERROR_ALLOW(?p))
^ (xPos <= PICT_XPOS(?p) + PICT_ERROR_ALLOW(?p))
^ (yPos >= PICT_YPOS(?p) – PICT_ERROR_ALLOW(?p))
^ (yPos <= PICT_YPOS(?p) + PICT_ERROR_ALLOW(?p)))
then PICT_VALUE(?p)
else 0.0 ]
else 0.0;
state-action-constraints {
// Cannot snap a picture and move at the same time
snapPicture => ((xMove == 0.0) ^ (yMove == 0.0));
};
}
non-fluents pics3 {
domain = simple_mars_rover;
// Three pictures
objects {
picture-point : {p1, p2, p3};
};
non-fluents {
MAX_TIME = 12.0;
MOVE_VARIANCE_MULT = 0.1;
PICT_XPOS(p1) = 1.0;
PICT_YPOS(p1) = -1.0;
PICT_VALUE(p1) = 5.0;
PICT_ERROR_ALLOW(p1) = .5;
PICT_XPOS(p2) = 1.0;
PICT_YPOS(p2) = 1.0;
PICT_VALUE(p2) = 10.0;
PICT_ERROR_ALLOW(p2) = .2; // Fairly tight constraints given noise
PICT_XPOS(p3) = 2.0;
PICT_YPOS(p3) = -1.0;
PICT_VALUE(p3) = 7.0;
PICT_ERROR_ALLOW(p3) = .3;
};
}
instance inst_simple_mars_rover {
domain = simple_mars_rover;
non-fluents = pics3;
init-state {
xPos = 0.0;
yPos = 0.0;
time = 0.0;
};
// State-action constraints above are sufficient
max-nondef-actions = pos-inf;
horizon = 10;
discount = 1.0;
}