CS计算机代考程序代写 ////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////
// Skill Teaching Domain
//
// Author: Tom Walsh (thomasjwalsh [at] gmail.com)
// Special thanks to Derek Green and Paul Cohen at
// University of Arizona for help with the design.
//
// In the SkillTeaching MDP domain, the agent is trying to teach a series
// of skills to a student through the use of hints and multiple choice
// questions. The student has a proficiency level for each skill, which
// indicates his ability to answer questions of that skill and positive
// reward is given for high proficiency on skills while negative reward
// is given for low proficiency. Each skill also has a weight on
// how much it is worth.
//
// Many of the skills are connected in that some are
// “pre-conditions” of others. If all of a skill’s
// pre-conditions are learned, the student has some probability
// of answering questions about it right, and each precondition
// that is at high proficiency adds to the probability though
// knowing all of them can lead to a probability higher than the sum
// of the components. Hints only work if all the preconditions
// are known and can only get you to medium proficiency.
//
// student proficiency increases with questions answered right and
// decreases with questions about a skill answered wrong and
// sometimes decreases by chance.
//
// To model the teacher-student interaction, every other step in the
// domain is the student’s turn, where they answer a question.
//
// The planning problems here are:
// 1) Whether or not to teach all the prerequisites of a skill before
// teaching it.
// 2) What skill to focus on next
// 3) When to give hints and when to use multiple choice problems
//
////////////////////////////////////////////////////////////////////

domain skill_teaching_mdp {

requirements = {
reward-deterministic
};

types {
skill : object;
};

pvariables {

//how valuable is this skill?
SKILL_WEIGHT(skill) : { non-fluent, real, default = 1.0 };

//some skills are pre-reqs for others. Your ability to achiev a higher level skill is dependent on how
//many of the pre-reqs you have mastered
PRE_REQ(skill, skill) : { non-fluent, bool, default = false };

//probability of getting a question right if you have all the pre-reqs
PROB_ALL_PRE(skill) : { non-fluent, real, default = 0.8 };
//if you don’t have all the pre-cons, probaility mass is summed using these individual pieces
PROB_PER_PRE(skill) : { non-fluent, real, default = 0.1 };

PROB_ALL_PRE_MED(skill) : { non-fluent, real, default = 1.0 };
//if you don’t have all the pre-cons, probaility mass is summed using these individual pieces
PROB_PER_PRE_MED(skill) : { non-fluent, real, default = 0.3 };

PROB_HIGH(skill) : { non-fluent, real, default = 0.9 };

LOSE_PROB(skill) : { non-fluent, real, default = 0.02 };

//proficiency values, they accumulate so low and med can be on at the same time and only high will turn off
proficiencyMed(skill) : { state-fluent, bool, default = false };
proficiencyHigh(skill) : { state-fluent, bool, default = false };

updateTurn(skill) : {state-fluent, bool, default = false};

answeredRight(skill): {state-fluent, bool, default = false};
hintedRight(skill): {state-fluent, bool, default = false};
hintDelayVar(skill) : {state-fluent, bool, default = false};

//two actions. Hint can get you directly to proficiencyMed, but only if all the pre_reqs are on
askProb(skill) : {action-fluent, bool, default = false};
giveHint(skill) : {action-fluent, bool, default = false};
};

cpfs {

updateTurn'(?s) =
KronDelta( [forall_{?s2: skill} ~updateTurn(?s2)] ^ (askProb(?s) | giveHint(?s)) );

//without intermediate nodes, we need to keep “on” all proficiency levels that have been attained

answeredRight'(?s) =
if ([forall_{?s2: skill} ~updateTurn(?s2)] ^ askProb(?s) ^ proficiencyHigh(?s))
then Bernoulli(PROB_HIGH(?s))
else if ([forall_{?s2: skill} ~updateTurn(?s2)] ^ askProb(?s) ^ proficiencyMed(?s) ^forall_{?s3: skill}[PRE_REQ(?s3, ?s) => proficiencyHigh(?s3)])
then Bernoulli(PROB_ALL_PRE_MED(?s))
else if ([forall_{?s2: skill} ~updateTurn(?s2)] ^ askProb(?s) ^proficiencyMed(?s) ^ askProb(?s))
then Bernoulli(sum_{?s2: skill}[PRE_REQ(?s2, ?s) * PROB_PER_PRE_MED(?s)])
else if ([forall_{?s3: skill} ~updateTurn(?s3)] ^ askProb(?s) ^forall_{?s2: skill}[PRE_REQ(?s2, ?s) => proficiencyHigh(?s2)])
then Bernoulli(PROB_ALL_PRE(?s))
else if ([forall_{?s2: skill} ~updateTurn(?s2)] ^ askProb(?s) ^ askProb(?s))
then Bernoulli(sum_{?s2: skill}[PRE_REQ(?s2, ?s) * PROB_PER_PRE(?s)])
else
KronDelta( false );

hintedRight'(?s) =
KronDelta( [forall_{?s3: skill} ~updateTurn(?s3)] ^ giveHint(?s) ^ forall_{?s2: skill}[PRE_REQ(?s2, ?s) => proficiencyHigh(?s2)] );

hintDelayVar'(?s) =
KronDelta( [forall_{?s2: skill} ~updateTurn(?s2)] ^ giveHint(?s) );

//proficiencyMed can be reached through a hint if all preconditions are known or by a problem answered correctly
proficiencyMed'(?s) =
if (~updateTurn(?s) ^ proficiencyMed(?s))
then KronDelta( true )
else if (updateTurn(?s) ^ hintedRight(?s))
then KronDelta( true )
else if (updateTurn(?s) ^ answeredRight(?s))
then KronDelta( true )
else if (proficiencyHigh(?s)) //may come down
then KronDelta( true )
else if (proficiencyMed(?s) ^ updateTurn(?s) ^ hintDelayVar(?s))
then KronDelta( true ) //can’t lose it on a hint
else
KronDelta( false );

//high proficiency is reached by getting a question and having proficiencyMed
//but you can lose it too if you get questions wrong
proficiencyHigh'(?s) =
if (forall_{?s2: skill}[~updateTurn(?s2)]) //student turn
then KronDelta( proficiencyHigh(?s) )
else if (~updateTurn(?s) ^ proficiencyHigh(?s))
then Bernoulli(1.0 – LOSE_PROB(?s))
else if (proficiencyMed(?s) ^ updateTurn(?s) ^ answeredRight(?s))
then KronDelta( true )
else if (proficiencyHigh(?s) ^ updateTurn(?s) ^ (hintDelayVar(?s) | answeredRight(?s))) //can’t lose it on a hint
then KronDelta( true )
else KronDelta( false );

};

reward = [sum_{?s : skill} [SKILL_WEIGHT(?s) * proficiencyHigh(?s)]] + [sum_{?s : skill} -[SKILL_WEIGHT(?s) * ~proficiencyMed(?s)]];

}