CS计算机代考程序代写 import random

import random
from game_state import GameState

“””
game_env.py

This file contains a class representing an Untitled Dragon Game environment. You should make use of this class in your
solver.

COMP3702 2021 Assignment 2 Support Code

Last updated by njc 02/09/21
“””

class GameEnv:
“””
Instance of an Untitled Dragon Game environment. Stores the dimensions of the environment, initial player position,
exit position, number of gems and position of each gem, time limit, cost target, the tile type of each grid
position, and a list of all available actions.

The grid is indexed top to bottom, left to right (i.e. the top left corner has coordinates (0, 0) and the bottom
right corner has coordinates (n_rows-1, n_cols-1)).

You may use and modify this class however you want. Note that evaluation on GradeScope will use an unmodified
GameEnv instance as a simulator.
“””

# input file symbols
SOLID_TILE = ‘X’
LADDER_TILE = ‘=’
AIR_TILE = ‘ ‘
LAVA_TILE = ‘*’
SUPER_JUMP_TILE = ‘J’
SUPER_CHARGE_TILE = ‘C’
GEM_TILE = ‘G’
EXIT_TILE = ‘E’
PLAYER_TILE = ‘P’
VALID_TILES = {SOLID_TILE, LADDER_TILE, AIR_TILE, LAVA_TILE, SUPER_JUMP_TILE, SUPER_CHARGE_TILE, GEM_TILE,
EXIT_TILE, PLAYER_TILE}
WALK_JUMP_ALLOWED_TILES = {SOLID_TILE, LADDER_TILE, SUPER_JUMP_TILE, SUPER_CHARGE_TILE}
GLIDE_DROP_ALLOWED_TILES = {AIR_TILE, LADDER_TILE, LAVA_TILE}
COLLISION_TILES = {SOLID_TILE, SUPER_JUMP_TILE, SUPER_CHARGE_TILE}

# action symbols (i.e. output file symbols)
WALK_LEFT = ‘wl’
WALK_RIGHT = ‘wr’
JUMP = ‘j’
GLIDE_LEFT_1 = ‘gl1’
GLIDE_LEFT_2 = ‘gl2’
GLIDE_LEFT_3 = ‘gl3’
GLIDE_RIGHT_1 = ‘gr1’
GLIDE_RIGHT_2 = ‘gr2’
GLIDE_RIGHT_3 = ‘gr3’
DROP_1 = ‘d1’
DROP_2 = ‘d2’
DROP_3 = ‘d3’
ACTIONS = {WALK_LEFT, WALK_RIGHT, JUMP, GLIDE_LEFT_1, GLIDE_LEFT_2, GLIDE_LEFT_3,
GLIDE_RIGHT_1, GLIDE_RIGHT_2, GLIDE_RIGHT_3, DROP_1, DROP_2, DROP_3}
WALK_ACTIONS = {WALK_LEFT, WALK_RIGHT}
GLIDE_ACTIONS = {GLIDE_LEFT_1, GLIDE_LEFT_2, GLIDE_LEFT_3, GLIDE_RIGHT_1, GLIDE_RIGHT_2, GLIDE_RIGHT_3}
DROP_ACTIONS = {DROP_1, DROP_2, DROP_3}
ACTION_COST = {WALK_LEFT: 1.0, WALK_RIGHT: 1.0, JUMP: 2.0, GLIDE_LEFT_1: 0.7, GLIDE_LEFT_2: 1.0, GLIDE_LEFT_3: 1.2,
GLIDE_RIGHT_1: 0.7, GLIDE_RIGHT_2: 1.0, GLIDE_RIGHT_3: 1.2, DROP_1: 0.3, DROP_2: 0.4, DROP_3: 0.5}

def __init__(self, filename):
“””
Process the given input file and create a new game environment instance based on the input file.
:param filename: name of input file
“””
f = open(filename, ‘r’)

grid_data = []
i = 0
for line in f:
# skip annotations in input file
if line.strip()[0] == ‘#’:
continue

if i == 0:
try:
self.n_rows, self.n_cols = tuple([int(x) for x in line.strip().split(‘,’)])
except ValueError:
assert False, f’!!! Invalid input file – n_rows and n_cols (line {i}) !!!’
elif i == 1:
try:
self.offline_time = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – offline time (line {i}) !!!’
elif i == 2:
try:
self.online_time = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – online time (line {i}) !!!’
elif i == 3:
try:
self.reward_tgt = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – reward target (line {i}) !!!’
elif i == 4:
try:
probs = [float(x) for x in line.strip().split(‘,’)]
self.glide1_probs = {0: probs[0], 1: probs[1], 2: probs[2]}
self.glide2_probs = {1: probs[0], 2: probs[1], 3: probs[2]}
self.glide3_probs = {2: probs[0], 3: probs[1], 4: probs[2]}
except ValueError:
assert False, f’!!! Invalid input file – glide probabilities (line {i}) !!!’
elif i == 5:
try:
probs = [float(x) for x in line.strip().split(‘,’)]
self.super_jump_probs = {2: probs[0], 3: probs[1], 4: probs[2], 5: probs[3]}
except ValueError:
assert False, f’!!! Invalid input file – super jump probabilities (line {i}) !!!’
elif i == 6:
try:
probs = [float(x) for x in line.strip().split(‘,’)]
self.super_charge_probs = {2: probs[0], 3: probs[1], 4: probs[2], 5: probs[3]}
except ValueError:
assert False, f’!!! Invalid input file – super charge probabilities (line {i}) !!!’
elif i == 7:
try:
self.ladder_fall_prob = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – ladder fall probability (line {i}) !!!’
elif i == 8:
try:
self.collision_penalty = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – collision penalty (line {i}) !!!’
elif i == 9:
try:
self.game_over_penalty = float(line.strip())
except ValueError:
assert False, f’!!! Invalid input file – game over penalty (line {i}) !!!’
elif len(line.strip()) > 0:
grid_data.append(list(line.strip()))
assert len(grid_data[-1]) == self.n_cols,\
f’!!! Invalid input file – incorrect map row length (line {i}) !!!’

i += 1

# extract gem, exit and initial positions
gem_positions = []
self.init_row, self.init_col = None, None
self.exit_row, self.exit_col = None, None
for r in range(self.n_rows):
for c in range(self.n_cols):
if grid_data[r][c] == self.PLAYER_TILE:
assert self.init_row is None and self.init_col is None, \
‘!!! Invalid input file – more than one initial player position !!!’
self.init_row, self.init_col = r, c
# assume player starts on air tile
grid_data[r][c] = self.AIR_TILE
elif grid_data[r][c] == ‘E’:
assert self.exit_row is None and self.exit_col is None, \
‘!!! Invalid input file – more than one exit position !!!’
self.exit_row, self.exit_col = r, c
# assume exit is placed on air tile
grid_data[r][c] = self.AIR_TILE
elif grid_data[r][c] == self.GEM_TILE:
gem_positions.append((r, c))
# assume all gems are placed on air tiles
grid_data[r][c] = self.AIR_TILE
self.n_gems = len(gem_positions)
assert self.init_row is not None and self.init_col is not None, \
‘!!! Invalid input file – No player initial position !!!’
assert self.exit_row is not None and self.exit_col is not None, \
‘!!! Invalid input file – No exit position !!!’

assert len(grid_data) == self.n_rows, f’!!! Invalid input file – incorrect number of map rows !!!’

self.gem_positions = gem_positions
self.grid_data = grid_data

def get_init_state(self):
“””
Get a state representation instance for the initial state.
:return: initial state
“””
return GameState(self.init_row, self.init_col, tuple(0 for g in self.gem_positions))

def __check_collision_or_terminal(self, row, col, reward, row_move_dir, col_move_dir):
terminal = False
collision = False
# check for collision condition
if (not 0 <= row < self.n_rows) or (not 0 <= col < self.n_cols) or \ self.grid_data[row][col] in self.COLLISION_TILES: reward -= self.collision_penalty row -= row_move_dir # bounce back to previous position col -= col_move_dir # bounce back to previous position collision = True # check for game over condition elif self.grid_data[row][col] == self.LAVA_TILE: reward -= self.game_over_penalty terminal = True return row, col, reward, collision, terminal def __check_collision_or_terminal_glide(self, row, col, reward, row_move_dir, col_move_dir): # variant for checking glide actions - checks row above as well as current row terminal = False collision = False # check for collision condition if (not 0 <= row < self.n_rows) or (not 0 <= col < self.n_cols) or \ self.grid_data[row][col] in self.COLLISION_TILES or \ self.grid_data[row - 1][col] in self.COLLISION_TILES: reward -= self.collision_penalty row -= row_move_dir # bounce back to previous position col -= col_move_dir # bounce back to previous position collision = True # check for game over condition elif self.grid_data[row][col] == self.LAVA_TILE or self.grid_data[row - 1][col] == self.LAVA_TILE: reward -= self.game_over_penalty terminal = True return row, col, reward, collision, terminal def __check_gem_collected_or_goal_reached(self, row, col, gem_status): is_terminal = False # check if a gem is collected (only do this for final position of charge) if (row, col) in self.gem_positions and \ gem_status[self.gem_positions.index((row, col))] == 0: gem_status = list(gem_status) gem_status[self.gem_positions.index((row, col))] = 1 gem_status = tuple(gem_status) # check for goal reached condition (only do this for final position of charge) elif row == self.exit_row and col == self.exit_col and \ all(gs == 1 for gs in gem_status): is_terminal = True return gem_status, is_terminal @staticmethod def __sample_move_dist(probs): rn = random.random() cumulative_prob = 0 move_dist = 0 for k in probs.keys(): cumulative_prob += probs[k] if rn < cumulative_prob: move_dist = k break return move_dist def perform_action(self, state, action, seed=None): """ Perform the given action on the given state, sample an outcome, and return whether the action was valid, and if so, the received reward, the resulting new state and whether the new state is terminal. :param state: current GameState :param action: an element of self.ACTIONS :param seed: random number generator seed (for consistent outcomes between runs) :return: (action_is_valid [True/False], received_reward [float], next_state [GameState], state_is_terminal [True/False]) """ reward = -1 * self.ACTION_COST[action] is_game_over = False # check if the given action is valid for the given state if action in {self.WALK_LEFT, self.WALK_RIGHT, self.JUMP}: # check walkable ground prerequisite if action is walk or jump if self.grid_data[state.row + 1][state.col] not in self.WALK_JUMP_ALLOWED_TILES: # prerequisite not satisfied return False, None, None, None else: # check permeable ground prerequisite if action is glide or drop if self.grid_data[state.row + 1][state.col] not in self.GLIDE_DROP_ALLOWED_TILES: # prerequisite not satisfied return False, None, None, None # handle each action type separately if action in self.WALK_ACTIONS: if self.grid_data[state.row + 1][state.col] == self.SUPER_CHARGE_TILE: # sample a random move distance random.seed(seed) move_dist = self.__sample_move_dist(self.super_charge_probs) # set movement direction if action == self.WALK_LEFT: move_dir = -1 else: move_dir = 1 next_row, next_col = state.row, state.col next_gem_status = state.gem_status # move up to the last adjoining supercharge tile while self.grid_data[next_row + 1][next_col + move_dir] == self.SUPER_CHARGE_TILE: next_col += move_dir # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal(next_row, next_col, reward, row_move_dir=0, col_move_dir=move_dir) if collision or is_game_over: break # move sampled move distance beyond the last adjoining supercharge tile for d in range(move_dist): next_col += move_dir # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal(next_row, next_col, reward, row_move_dir=0, col_move_dir=move_dir) if collision or is_game_over: break # check if a gem is collected or goal is reached (only do this for final position of charge) next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved else: # if on ladder, sample whether fall occurs random.seed(seed) if self.grid_data[state.row + 1][state.col] == self.LADDER_TILE and \ self.grid_data[state.row + 2][state.col] not in self.COLLISION_TILES and \ random.random() < self.ladder_fall_prob: next_row, next_col = state.row + 2, state.col row_move_dir = 1 col_move_dir = 0 # not on ladder or no fall - set movement direction based on chosen action elif action == self.WALK_LEFT: col_move_dir = -1 row_move_dir = 0 next_row, next_col = (state.row, state.col + col_move_dir) else: col_move_dir = 1 row_move_dir = 0 next_row, next_col = (state.row, state.col + col_move_dir) next_gem_status = state.gem_status # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal(next_row, next_col, reward, row_move_dir=row_move_dir, col_move_dir=col_move_dir) # check if a gem is collected or goal is reached next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved elif action == self.JUMP: if self.grid_data[state.row + 1][state.col] == self.SUPER_JUMP_TILE: # sample a random move distance random.seed(seed) move_dist = self.__sample_move_dist(self.super_jump_probs) next_row, next_col = state.row, state.col next_gem_status = state.gem_status # move sampled distance upwards for d in range(move_dist): next_row -= 1 # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal(next_row, next_col, reward, row_move_dir=-1, col_move_dir=0) if collision or is_game_over: break # check if a gem is collected or goal is reached (only do this for final position of charge) next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved else: next_row, next_col = state.row - 1, state.col next_gem_status = state.gem_status # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal(next_row, next_col, reward, row_move_dir=-1, col_move_dir=0) # check if a gem is collected or goal is reached next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved elif action in self.GLIDE_ACTIONS: # select probabilities to sample move distance if action in {self.GLIDE_LEFT_1, self.GLIDE_RIGHT_1}: probs = self.glide1_probs elif action in {self.GLIDE_LEFT_2, self.GLIDE_RIGHT_2}: probs = self.glide2_probs else: probs = self.glide3_probs # sample a random move distance random.seed(seed) move_dist = self.__sample_move_dist(probs) # set movement direction if action in {self.GLIDE_LEFT_1, self.GLIDE_LEFT_2, self.GLIDE_LEFT_3}: move_dir = -1 else: move_dir = 1 # move sampled distance in chosen direction next_row, next_col = state.row + 1, state.col next_gem_status = state.gem_status for d in range(move_dist): next_col += move_dir # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal_glide(next_row, next_col, reward, row_move_dir=0, col_move_dir=move_dir) if collision or is_game_over: break # check if a gem is collected or goal is reached (only do this for final position of charge) next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved elif action in self.DROP_ACTIONS: move_dist = {self.DROP_1: 1, self.DROP_2: 2, self.DROP_3: 3}[action] # drop by chosen distance next_row, next_col = state.row, state.col next_gem_status = state.gem_status for d in range(move_dist): next_row += 1 # check for collision or game over next_row, next_col, reward, collision, is_game_over = \ self.__check_collision_or_terminal_glide(next_row, next_col, reward, row_move_dir=1, col_move_dir=0) if collision or is_game_over: break # check if a gem is collected or goal is reached (only do this for final position of charge) next_gem_status, is_solved = self.__check_gem_collected_or_goal_reached(next_row, next_col, next_gem_status) return True, reward, GameState(next_row, next_col, next_gem_status), is_game_over or is_solved else: assert False, '!!! Invalid action given to perform_action() !!!' def is_solved(self, state): """ Check if the game has been solved (i.e. player at exit and all gems collected) :param state: current GameState :return: True if solved, False otherwise """ all_gems_collected = True for g in state.gem_status: if g == 0: all_gems_collected = False return state.row == self.exit_row and state.col == self.exit_col and all_gems_collected def is_game_over(self, state): """ Check if a game over situation has occurred (i.e. player has entered on a lava tile) :param state: current GameState :return: True if game over, False otherwise """ assert 0 < state.row < self.n_rows - 1 and 0 < state.col < self.n_cols - 1, '!!! invalid player coordinates !!!' return self.grid_data[state.row][state.col] == self.LAVA_TILE def render(self, state): """ Render the map's current state to terminal """ for r in range(self.n_rows): line = '' for c in range(self.n_cols): if state.row == r and state.col == c: # current tile is player line += self.grid_data[r][c] + 'P' + self.grid_data[r][c] elif self.exit_row == r and self.exit_col == c: # current tile is exit line += self.grid_data[r][c] + 'E' + self.grid_data[r][c] elif (r, c) in self.gem_positions and \ state.gem_status[self.gem_positions.index((r, c))] == 0: # current tile is an uncollected gem line += self.grid_data[r][c] + 'G' + self.grid_data[r][c] elif self.grid_data[r][c] in {self.SUPER_CHARGE_TILE, self.SUPER_JUMP_TILE}: line += '[' + self.grid_data[r][c] + ']' else: line += self.grid_data[r][c] * 3 print(line) print('\n' * 2)