% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/accessors.R, R/accessors_reward.R,
%   R/accessors_trans_obs.R
\name{accessors}
\alias{accessors}
\alias{start_vector}
\alias{normalize_POMDP}
\alias{normalize_MDP}
\alias{reward_matrix}
\alias{reward_val}
\alias{transition_matrix}
\alias{transition_val}
\alias{observation_matrix}
\alias{observation_val}
\title{Access to Parts of the Model Description}
\usage{
start_vector(x)

normalize_POMDP(
  x,
  sparse = TRUE,
  trans_start = FALSE,
  trans_function = TRUE,
  trans_keyword = FALSE
)

normalize_MDP(
  x,
  sparse = TRUE,
  trans_start = FALSE,
  trans_function = TRUE,
  trans_keyword = FALSE
)

reward_matrix(
  x,
  action = NULL,
  start.state = NULL,
  end.state = NULL,
  observation = NULL,
  episode = NULL,
  epoch = NULL,
  sparse = FALSE
)

reward_val(
  x,
  action,
  start.state,
  end.state = NULL,
  observation = NULL,
  episode = NULL,
  epoch = NULL
)

transition_matrix(
  x,
  action = NULL,
  start.state = NULL,
  end.state = NULL,
  episode = NULL,
  epoch = NULL,
  sparse = FALSE,
  trans_keyword = TRUE
)

transition_val(x, action, start.state, end.state, episode = NULL, epoch = NULL)

observation_matrix(
  x,
  action = NULL,
  end.state = NULL,
  observation = NULL,
  episode = NULL,
  epoch = NULL,
  sparse = FALSE,
  trans_keyword = TRUE
)

observation_val(
  x,
  action,
  end.state,
  observation,
  episode = NULL,
  epoch = NULL
)
}
\arguments{
\item{x}{A \link{POMDP} or \link{MDP} object.}

\item{sparse}{logical; use sparse matrices when the density is below 50\% and keeps data.frame representation
for the reward field. \code{NULL} returns the
representation stored in the problem description which saves the time for conversion.}

\item{trans_start}{logical; expand the start to a probability vector?}

\item{trans_function}{logical; convert functions into matrices?}

\item{trans_keyword}{logical; convert distribution keywords (uniform and identity)
in \code{transition_prob} or \code{observation_prob} to matrices?}

\item{action}{name or index of an action.}

\item{start.state, end.state}{name or index of the state.}

\item{observation}{name or index of observation.}

\item{episode, epoch}{Episode or epoch used for time-dependent POMDPs. Epochs are internally converted
to the episode using the model horizon.}
}
\value{
A list or a list of lists of matrices.
}
\description{
Functions to provide uniform access to different parts of the POMDP/MDP
problem description.
}
\details{
Several parts of the POMDP/MDP description can be defined in different ways. In particular,
the fields \code{transition_prob}, \code{observation_prob}, \code{reward}, and \code{start} can be defined using matrices, data frames,
keywords, or functions. See \link{POMDP} for details. The functions provided here, provide unified access to the data in these fields
to make writing code easier.
\subsection{Transition Probabilities \eqn{T(s'|s,a)}}{

\code{transition_matrix()} accesses the transition model. The complete model
is a list with one element for each action. Each element contains a states x states matrix
with \eqn{s} (\code{start.state}) as rows and \eqn{s'} (\code{end.state}) as columns.
Matrices with a density below 50\% can be requested in sparse format
(as a \link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix}).
}

\subsection{Observation Probabilities \eqn{O(o|s',a)}}{

\code{observation_matrix()} accesses the observation model. The complete model is a
list with one element for each action. Each element contains a states x observations matrix
with \eqn{s} (\code{start.state}) as rows and \eqn{o} (\code{observation}) as columns.
Matrices with a density below 50\% can be requested in sparse format
(as a \link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix})
}

\subsection{Reward \eqn{R(s,s',o,a)}}{

\code{reward_matrix()} accesses the reward model.
The preferred representation is a data.frame with the
columns \code{action}, \code{start.state}, \code{end.state},
\code{observation}, and \code{value}. This is a sparse representation.
The dense representation is a list of lists of matrices.
The list levels are \eqn{a} (\code{action})  and \eqn{s} (\code{start.state}).
The matrices have rows representing \eqn{s'} (\code{end.state})
and columns representing \eqn{o} (\code{observations}).
The reward structure cannot be efficiently stored using a standard sparse matrix
since there might be a fixed cost for each action
resulting in no entries with 0.
}

\subsection{Initial Belief}{

\code{start_vector()} translates the initial probability vector description into a numeric vector.
}

\subsection{Convert the Complete POMDP Description into a consistent form}{

\code{normalize_POMDP()} returns a new POMDP definition where \code{transition_prob},
\code{observations_prob}, \code{reward}, and \code{start} are normalized.

Also, \code{states}, \code{actions}, and \code{observations} are ordered as given in the problem
definition to make safe access using numerical indices possible. Normalized POMDP descriptions can be
used in custom code that expects consistently a certain format.
}
}
\examples{
data("Tiger")

# List of |A| transition matrices. One per action in the from start.states x end.states
Tiger$transition_prob
transition_matrix(Tiger)
transition_val(Tiger, action = "listen", start.state = "tiger-left", end.state = "tiger-left")

# List of |A| observation matrices. One per action in the from states x observations
Tiger$observation_prob
observation_matrix(Tiger)
observation_val(Tiger, action = "listen", end.state = "tiger-left", observation = "tiger-left")

# List of list of reward matrices. 1st level is action and second level is the
#  start state in the form end state x observation
Tiger$reward
reward_matrix(Tiger)
reward_matrix(Tiger, sparse = TRUE)
reward_matrix(Tiger, action = "open-right", start.state = "tiger-left", end.state = "tiger-left",
  observation = "tiger-left")

# Translate the initial belief vector
Tiger$start
start_vector(Tiger)

# Normalize the whole model
Tiger_norm <- normalize_POMDP(Tiger)
Tiger_norm$transition_prob

## Visualize transition matrix for action 'open-left'
plot_transition_graph(Tiger)

## Use a function for the Tiger transition model
trans <- function(action, end.state, start.state) {
  ## listen has an identity matrix
  if (action == 'listen')
    if (end.state == start.state) return(1)
    else return(0)

  # other actions have a uniform distribution
  return(1/2)
}

Tiger$transition_prob <- trans

# transition_matrix evaluates the function
transition_matrix(Tiger)
}
\seealso{
Other POMDP: 
\code{\link{MDP2POMDP}},
\code{\link{POMDP}()},
\code{\link{actions}()},
\code{\link{add_policy}()},
\code{\link{plot_belief_space}()},
\code{\link{projection}()},
\code{\link{reachable_and_absorbing}},
\code{\link{regret}()},
\code{\link{sample_belief_space}()},
\code{\link{simulate_POMDP}()},
\code{\link{solve_POMDP}()},
\code{\link{solve_SARSOP}()},
\code{\link{transition_graph}()},
\code{\link{update_belief}()},
\code{\link{value_function}()},
\code{\link{write_POMDP}()}

Other MDP: 
\code{\link{MDP}()},
\code{\link{MDP2POMDP}},
\code{\link{MDP_policy_functions}},
\code{\link{actions}()},
\code{\link{add_policy}()},
\code{\link{gridworld}},
\code{\link{reachable_and_absorbing}},
\code{\link{regret}()},
\code{\link{simulate_MDP}()},
\code{\link{solve_MDP}()},
\code{\link{transition_graph}()},
\code{\link{value_function}()}
}
\author{
Michael Hahsler
}
\concept{MDP}
\concept{POMDP}
