////////////////////////////////////////////////////////////////////
// A simple continuous MDP for multiagent Mars Rover.
//
// The goal here is to take as many high-value pictures (within their
// designated radii) as possible within the time constraints.
//
// This version is in RDDL 2.0 format.
//
// Motivated by:
//
// Bresina, J. L.; Dearden, R.; Meuleau, N.; Ramkrishnan, S.;
// Smith, D. E.; and Washington, R. 2002. Planning under continuous
// time and resource uncertainty: A challenge for AI. UAI 2002.
//
// Author: Scott Sanner (ssanner@gmail.com)
//
////////////////////////////////////////////////////////////////////
domain multiagent_mars_rover {
types {
robot : object;
picture-point : object;
dim-2d : {@x, @y}; // Define explicit labels for vector dimensions so can sum over
point-2d : <@x : real, @y : real>;
};
pvariables {
// Rover constants
MOVE_VARIANCE : { non-fluent, real, default = 0.5 };
MAX_MOVEMENT : { non-fluent, real, default = 40.0 };
// Each picture occurs in a different place and awards a different value
PICT_POS(picture-point) : { non-fluent, point-2d, default = <@x:0.0, @y:0.0> };
PICT_VALUE(picture-point) : { non-fluent, real, default = 1 };
PICT_RADIUS(picture-point) : { non-fluent, real, default = 5 };
// Picture reward received?
pic-taken(picture-point) : { state-fluent, bool, default = false };
// Rover coordinates
pos(robot) : { state-fluent, point-2d, default = <@x:0.0, @y:0.0> };
// Rover actions — for each robot, we can move and we can snap a picture
// (constrained below to be mutually exclusive).
move(robot) : { action-fluent, point-2d, default = <@x:0.0, @y:0.0> };
snapPicture(robot) : { action-fluent, bool, default = false };
// Derived fluents — deterministic computations from state and action,
// unlike stochastic intermediate variables, these can
// be used in global action preconditions (below).
pictureRewardPossible(robot, picture-point) : { derived-fluent, bool };
};
cpfs {
// Update rover coordinates based on movement, we assume surface
// of Mars has no coordinate constraints… can add if needed.
// Note that variance is a function of how far we’ve moved.
pos'(?r) = < @x: pos(?r).@x + move(?r).@x + Normal(0.0, MOVE_VARIANCE * abs[move(?r).@x]) ,
@y: pos(?r).@y + move(?r).@y + Normal(0.0, MOVE_VARIANCE * abs[move(?r).@y]) >;
// NOTE: if we wanted deterministic movement, we could just use *vector* operations
// pos'(?r) = pos(?r) + move(?r);
// Derived (helper) fluent to determine if a picture has not been taken yet and a
// robot is within the required radius to take a picture of it
pictureRewardPossible(?r,?p) = ~pic-taken(?p) ^
(sqrt[ sum_{?d : dim-2d} pow[ PICT_POS(?p).?d – pos(?r).?d, 2 ] ] < PICT_RADIUS(?p));
// A picture is taken if (a) it is already taken or (b) it can be taken *and* a robot has snapped a pic
pic-taken'(?p) = pic-taken(?p) | exists_{?r : robot} (pictureRewardPossible(?r,?p) ^ snapPicture(?r));
};
// We get a reward for any picture taken within picture box error bounds and we
// get penalized for all movements and pictures taken.
reward =
(sum_{?p : picture-point}
if (exists_{?r : robot} snapPicture(?r) ^ pictureRewardPossible(?r,?p))
then PICT_VALUE(?p)
else 0.0 )
- (sum_{?r : robot} snapPicture(?r)) // booleans are treated as 0-1 variables for arithmetic
- (sum_{?r : robot} sqrt[ sum_{?d : dim-2d} pow[ pos'(?r).?d - pos(?r).?d, 2 ] ]); // Euclidean distance moved
// These cannot mention actions -- they are simply assertions to
// verify that only legal states are reached. When starting from
// a legal initial state, only legal states should be reached,
// violation of this indicates an error in the domain design.
state-invariants {
// Error should be a positive quantity
forall_{?p : picture-point} PICT_RADIUS(?p) > 0;
// This will be eventually violated by a random walk, but just to give another state-invariant example
forall_{?r : robot} (pos(?r).@x >= -50) ^ (pos(?r).@x <= 50) ^ (pos(?r).@y >= -50) ^ (pos(?r).@y <= 50);
};
// Global action preconditions... cannot execute a joint action
// if it violates any of these constraints.
action-preconditions {
// Cannot snap a picture and move at the same time
// Since move is a vector type, an action is considered a non-default value assignment
// (if we want a boolean for action execution, it can always be added as a vector element)
forall_{?r : robot} ~[snapPicture(?r) ^ (move(?r) ~= move.default)];
};
}
// Specify three picture-points
non-fluents nf_multiagent_mars_rover {
domain = multiagent_mars_rover;
objects {
picture-point : {p1, p2, p3, p4, p5};
};
non-fluents {
MOVE_VARIANCE = 1.0;
PICT_POS(p1) = <@x:5.0, @y:-5.0>;
PICT_VALUE(p1) = 25;
PICT_RADIUS(p1) = 5; // Fairly tight constraints given noise
PICT_POS(p2) = <@x:5.0, @y:0.0>;
PICT_VALUE(p2) = 70;
PICT_RADIUS(p2) = 15;
PICT_POS(p3) = <@x:-10.0, @y:-10.0>;
PICT_VALUE(p3) = 90;
PICT_RADIUS(p3) = 10;
PICT_POS(p4) = <@x:-15.0, @y:15.0>;
PICT_VALUE(p4) = 120;
PICT_RADIUS(p4) = 7;
PICT_POS(p5) = <@x:15.0, @y:-15.0>;
PICT_VALUE(p5) = 100;
PICT_RADIUS(p5) = 3;
};
}
// Specify three robots using the the three picture points defined above
instance inst_multiagent_mars_rover {
domain = multiagent_mars_rover;
non-fluents = nf_multiagent_mars_rover;
objects {
robot : {r1, r2, r3};
};
init-state {
pos(r1) = <@x:5.0, @y:-5.0>;
pos(r2) = <@x:5.0, @y:5.0>;
pos(r3) = <@x:-10.0, @y:-10.0>;
};
// State-action constraints above are sufficient
max-nondef-actions = pos-inf;
horizon = 80;
discount = 1.0;
}