In: Computer Science
2.7 Implement an environment for a nxm rectangular room, where
each square has a 5% chance
of containing dirt, and n and m are chosen at random from the range
8 to 15, inclusive.
2.8 Design and implement a pure reflex agent for the environment of
previous problem 2.7, ignoring
the requirement of returning home, and measure its performance.
Explain why it is impossible
to have a reflex agent that returns homeland shuts itself off.
Speculate on what the best possible
reflex agent could do. What prevents a reflex agent from doing very
well?
2.9 Design and implement several agents with internal state.
Measure their performance. How
close do they come to the ideal agent for this environment?
2.10 Calculate the size of the table for a table-lookup agent in
the domain of problem 2.7 .
Explain your calculation. You need not fill in the entries for the
table.
2.11 Experiment with changing the shape and dirt placement of the
room, and with adding
furniture. Measure your agents in these new environments. Discuss
how their performance
might be improved to handle more complex geographies.
Solution
2.7
Set ENV_SIZE and DIRT_CHANCE in main.py
code
main.py
from environment import VacuumEnvironment
from agents import RandomAgent, ReflexAgent, InternalAgent
ENV_SIZE = (12, 12)
DIRT_CHANCE = 0.05
def main():
env = VacuumEnvironment(ENV_SIZE, DIRT_CHANCE)
agent = InternalAgent()
print env.room[0]
print env.room[1]
observation = env.state()
reward = 0
done = False
action = agent.act(observation, reward)
turn = 1
while not done:
observation, reward, done = env.step(action[0])
print "Step {0}: Action - {1}".format(turn, action[1])
action = agent.act(observation, reward)
# print "Reward {0} Total Reward {1}".format(reward,
agent.reward)
turn += 1
print env.room[0]
if __name__ == "__main__":
main()
Screenshot of main.py
---
2.8
Load ReflexAgent from agents.py
Impossible for a reflex agent to return home because it only knows it's immediate environment. Best possible is to wonder around cleaning dirt until it reaches home in which case it turns off. Prevented by no way to keep track of what it has already seen/done
code
agents.py
import numpy as np
ACTIONS = ((0, "Go Forward"),
(1, "Turn Right"),
(2, "Turn Left"),
(3, "Suck Dirt"),
(4, "Turn Off"),
(-1, "Break"),)
class RandomAgent(object):
def __init__(self):
self.reward = 0
def act(self, observation, reward):
self.reward += reward
action = ACTIONS[np.random.randint(len(ACTIONS))]
return action
class ReflexAgent(object):
def __init__(self):
self.reward = 0
def act(self, observation, reward):
self.reward += reward
# If dirt then suck
if observation['dirt'] == 1:
return ACTIONS[3]
# If obstacle then turn
if observation['obstacle'] == 1:
return ACTIONS[1]
# Else randomly choose from first 3 actions (stops infinite loop
circling edge)
return ACTIONS[np.random.randint(3)]
class InternalAgent(object):
def __init__(self):
self.reward = 0
self.map = [[-1, -1], [-1, -1]] # 0-Empty, 1-Dirt, 2-Obstacle,
3-Home
# Agent's relative position to map and direction
self.x = 0
self.y = 0
self.facing = 0 # -1-Unknown, 0-Up, 1-Right, 2-Down, 3-Left
def add_map(self):
side = self.is_on_edge()
while side >= 0:
if side == 0: # Top
self.map.insert(0, [-1] * len(self.map[0]))
self.x += 1
elif side == 1: # Right
for row in self.map:
row.append(-1)
elif side == 2: # Down
self.map.append([-1] * len(self.map[0]))
elif side == 3: # Left
for row in self.map:
row.insert(0, -1)
self.y += 1
side = self.is_on_edge()
def is_on_edge(self):
if self.x == 0:
return 0
elif self.y == len(self.map[0]) - 1:
return 1
elif self.x == len(self.map) - 1:
return 2
elif self.y == 0:
return 3
return -1
def move_forward(self):
if self.facing == 0:
self.x -= 1
elif self.facing == 1:
self.y += 1
elif self.facing == 2:
self.x += 1
elif self.facing == 3:
self.y -= 1
# If obstacle in position then move back to previous
square
def move_backwards(self):
if self.facing == 0:
self.x += 1
elif self.facing == 1:
self.y -= 1
elif self.facing == 2:
self.x -= 1
elif self.facing == 3:
self.y += 1
def update_map(self, observation):
if observation['dirt'] == 1:
self.map[self.x][self.y] = 1
elif observation['home'] == 1:
self.map[self.x][self.y] = 3
else:
self.map[self.x][self.y] = 0
if observation['obstacle'] == 1:
self.map[self.x][self.y] = 2
self.move_backwards()
# Fill in borders
x_len = len(self.map) - 1
y_len = len(self.map[0]) - 1
if self.map[0][1] == 2 and self.map[1][0] == 2:
self.map[0][0] = 2
if self.map[0][y_len - 1] == 2 and self.map[1][y_len] ==
2:
self.map[0][y_len] = 2
if self.map[x_len - 1][0] == 2 and self.map[x_len][1] ==
2:
self.map[x_len][0] = 2
if self.map[x_len][y_len - 1] == 2 and self.map[x_len -
1][y_len] == 2:
self.map[x_len][y_len] = 2
# Determine next action needed to move towards next_square from
current position
def next_step(self, next_square):
if next_square[0] < self.x and self.facing != 0 and
self.map[self.x - 1][self.y] != 2:
action = ACTIONS[2]
elif next_square[0] < self.x and self.facing == 0 and
self.map[self.x - 1][self.y] != 2:
action = ACTIONS[0]
elif next_square[0] > self.x and self.facing != 2 and
self.map[self.x + 1][self.y] != 2:
action = ACTIONS[2]
elif next_square[0] > self.x and self.facing == 2 and
self.map[self.x + 1][self.y] != 2:
action = ACTIONS[0]
elif next_square[1] > self.y and self.facing != 1 and
self.map[self.x][self.y + 1] != 2:
action = ACTIONS[2]
elif next_square[1] > self.y and self.facing == 1 and
self.map[self.x][self.y + 1] != 2:
action = ACTIONS[0]
elif next_square[1] < self.y and self.facing != 3 and
self.map[self.x][self.y - 1] != 2:
action = ACTIONS[2]
elif next_square[1] < self.y and self.facing == 3 and
self.map[self.x][self.y - 1] != 2:
action = ACTIONS[0]
else:
action = ACTIONS[4]
# If moving forward check if map needs to be expanded
if action[0] == 0:
self.move_forward()
if action[0] == 2:
self.facing = (self.facing - 1) % 4
return action
def find_nearest(self, square_type):
# Else move towards nearest unknown
min_dist = None
next_square = None
for i, row in enumerate(self.map):
for j, square in enumerate(row):
if square == square_type:
dist = (self.x - i) ** 2 + (self.y - j) ** 2
if min_dist is None or dist < min_dist:
min_dist = dist
next_square = (i, j)
return next_square
def choose_action(self):
# If on a patch of dirt then suck it up
if self.map[self.x][self.y] == 1:
return ACTIONS[3]
next_square = self.find_nearest(-1)
# If no more unknowns then head home
if next_square is None:
next_square = self.find_nearest(3)
return self.next_step(next_square)
def act(self, observation, reward):
self.reward += reward
self.update_map(observation)
self.add_map()
# Choose action (based on map)
return self.choose_action()
---
2.9
Load InternalAgent from agents.py
Performance works well, visiting unknown states until all possible unknown states have been checked before returning home. Path optimisation, mapping edges, and choice of way to turn could use some work.
---
2.10
For a 10x10 room each row would take (r = 102 + 1) actions to clear (suck-move * 10 plus a turn before the final move). For the entire board this would be (b = r10 - 2) because the final turn and move actions aren't needed Each turn has (t = (10x10)^2) possible percept vectors (dirt or no dirt in each square) Lookup size = t^b = ((1010)^2) ^ ((102+1)*10-2) = 10000^208 = 10^211 table values See TableLookup10x10() in agents for completed table
---
2.11
Agent could be improved by first identifying the edges of the location and then optimising a path through the remaining squares. Could also make actions based on largest area of unknown squares, or always choose unknown squares over known ones when available and moving towards a point.
---
all the best