Source code for textworld.gym.envs.textworld

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.


from typing import List, Optional, Dict, Any, Tuple

import gym

from textworld import EnvInfos
from textworld.gym.envs.textworld_batch import TextworldBatchGymEnv


[docs]class TextworldGymEnv(TextworldBatchGymEnv): metadata = {'render.modes': ['human', 'ansi', 'text']} def __init__(self, gamefiles: List[str], request_infos: Optional[EnvInfos] = None, max_episode_steps: Optional[int] = None, action_space: Optional[gym.Space] = None, observation_space: Optional[gym.Space] = None, **kwargs) -> None: """ Environment for playing text-based games. Arguments: gamefiles: Paths of every game composing the pool (`*.ulx|*.z[1-8]`). request_infos: For customizing the information returned by this environment (see :py:class:`textworld.EnvInfos <textworld.envs.wrappers.filter.EnvInfos>` for the list of available information). .. warning:: Only supported for TextWorld games (i.e., that have a corresponding `*.json` file). max_episode_steps: Number of steps allocated to play each game. Once exhausted, the game is done. action_space: The action space be used with OpenAI baselines. (see :py:class:`textworld.gym.spaces.Word <textworld.gym.spaces.text_spaces.Word>`). observation_space: The observation space be used with OpenAI baselines (see :py:class:`textworld.gym.spaces.Word <textworld.gym.spaces.text_spaces.Word>`). """ super().__init__(gamefiles=gamefiles, request_infos=request_infos, max_episode_steps=max_episode_steps, action_space=action_space, observation_space=observation_space, **kwargs)
[docs] def reset(self) -> Tuple[str, Dict[str, Any]]: """ Resets the text-based environment. Resetting this environment means starting the next game in the pool. Returns: A tuple (observation, info) where * observation: text observed in the initial state; * infos: additional information as requested. """ obs, infos = super().reset() return obs[0], {k: v[0] for k, v in infos.items()}
[docs] def step(self, command) -> Tuple[str, Dict[str, Any]]: """ Runs a command in the text-based environment. Arguments: command: Text command to send to the game interpreter. Returns: A tuple (observation, score, done, info) where * observation: text observed in the new state; * score: total number of points accumulated so far; * done: whether the game is finished or not; * infos: additional information as requested. """ obs, scores, dones, infos = super().step([command]) return obs[0], scores[0], dones[0], {k: v[0] for k, v in infos.items()}