Exposing Stan user-defined functions using CmdStanR and Rcpp
2022-04-10
1 Introduction
This document showcases how to expose user-defined Stan functions using CmdStanR. To run the examples below, you need to have CmdStanR, Rcpp and RcppEigen packages installed and an installation of CmdStan. The minimum CmdStan version is 2.26.2, though I recommend running with 2.29.2 or newer. If you experience issues using this function or running the tutorial below or have a feature request, please report it on the issue tracker here.
2 Preparing the environment
We first need to load the function expose_cmdstanr_functions()
that will do all the hard work for us. The function is available on Github here. You can also copy and source the code below.
<- function(model_path, include_paths = NULL,
expose_cmdstanr_functions expose_to_global_env = FALSE) {
<- c("Rcpp", "RcppEigen", "cmdstanr")
required_pkgs <- required_pkgs %in% rownames(installed.packages())
found_pkgs if (!all(found_pkgs)) {
stop(
"The following required packages are missing: ",
paste0(required_packages[!found_pkgs], collapse = ", "),
"."
)
}if (cmdstanr::cmdstan_version() < "2.26.0") {
stop("Please install CmdStan version 2.26 or newer.", call. = FALSE)
}<- function(flag_name) {
get_cmdstan_flags <- cmdstanr::cmdstan_path()
cmdstan_path <- processx::run(
flags "make",
args = c(paste0("print-", flag_name)),
wd = cmdstan_path
$stdout
)<- gsub(
flags pattern = paste0(flag_name, " ="),
replacement = "", x = flags, fixed = TRUE
)<- gsub(
flags pattern = " stan/", replacement = paste0(" ", cmdstan_path, "/stan/"),
x = flags, fixed = TRUE
)<- gsub(
flags pattern = "-I lib/", replacement = paste0("-I ", cmdstan_path, "/lib/"),
x = flags, fixed = TRUE
)<- gsub(
flags pattern = "-I src", replacement = paste0("-I ", cmdstan_path, "/src"),
x = flags, fixed = TRUE
)gsub("\n", "", flags)
}<- tempfile(pattern = "model-", fileext = ".stan")
temp_stan_file <- paste0(tools::file_path_sans_ext(temp_stan_file), ".cpp")
temp_cpp_file file.copy(model_path, temp_stan_file, overwrite = TRUE)
if (isTRUE(.Platform$OS.type == "windows")) {
<- "./bin/stanc.exe"
stanc3 else {
} <- "./bin/stanc"
stanc3
}::run(
processx
stanc3,args = c(
temp_stan_file,"--standalone-functions",
paste0("--include-paths=", include_paths),
paste0("--o=",temp_cpp_file)
),wd = cmdstanr::cmdstan_path()
)<- paste(readLines(temp_cpp_file), collapse = "\n")
code <- paste(
code "// [[Rcpp::depends(RcppEigen)]]",
"#include <stan/math/prim/fun/Eigen.hpp>",
"#include <RcppCommon.h>
#include <boost/random/additive_combine.hpp>
#include <iostream>
namespace Rcpp {
SEXP wrap(boost::ecuyer1988 RNG);
SEXP wrap(boost::ecuyer1988& RNG);
SEXP wrap(std::ostream stream);
template <> boost::ecuyer1988 as(SEXP ptr_RNG);
template <> boost::ecuyer1988& as(SEXP ptr_RNG);
template <> std::ostream* as(SEXP ptr_stream);
namespace traits {
template <> class Exporter<boost::ecuyer1988&>;
template <> struct input_parameter<boost::ecuyer1988&>;
}
}
#include <Rcpp.h>
namespace Rcpp {
SEXP wrap(boost::ecuyer1988 RNG){
boost::ecuyer1988* ptr_RNG = &RNG;
Rcpp::XPtr<boost::ecuyer1988> Xptr_RNG(ptr_RNG);
return Xptr_RNG;
}
SEXP wrap(boost::ecuyer1988& RNG){
boost::ecuyer1988* ptr_RNG = &RNG;
Rcpp::XPtr<boost::ecuyer1988> Xptr_RNG(ptr_RNG);
return Xptr_RNG;
}
SEXP wrap(std::ostream stream) {
std::ostream* ptr_stream = &stream;
Rcpp::XPtr<std::ostream> Xptr_stream(ptr_stream);
return Xptr_stream;
}
template <> boost::ecuyer1988 as(SEXP ptr_RNG) {
Rcpp::XPtr<boost::ecuyer1988> ptr(ptr_RNG);
boost::ecuyer1988& RNG = *ptr;
return RNG;
}
template <> boost::ecuyer1988& as(SEXP ptr_RNG) {
Rcpp::XPtr<boost::ecuyer1988> ptr(ptr_RNG);
boost::ecuyer1988& RNG = *ptr;
return RNG;
}
template <> std::ostream* as(SEXP ptr_stream) {
Rcpp::XPtr<std::ostream> ptr(ptr_stream);
return ptr;
}
namespace traits {
template <> class Exporter<boost::ecuyer1988&> {
public:
Exporter( SEXP x ) : t(Rcpp::as<boost::ecuyer1988&>(x)) {}
inline boost::ecuyer1988& get() { return t ; }
private:
boost::ecuyer1988& t ;
} ;
template <>
struct input_parameter<boost::ecuyer1988&> {
typedef
typename Rcpp::ConstReferenceInputParameter<boost::ecuyer1988&> type ;
//typedef typename boost::ecuyer1988& type ;
};
}
}
RcppExport SEXP get_stream_() {
std::ostream* pstream(&Rcpp::Rcout);
Rcpp::XPtr<std::ostream> ptr(pstream, false);
return ptr;
}
RcppExport SEXP get_rng_(SEXP seed) {
int seed_ = Rcpp::as<int>(seed);
boost::ecuyer1988* rng = new boost::ecuyer1988(seed_);
Rcpp::XPtr<boost::ecuyer1988> ptr(rng, true);
return ptr;
}
",
"#include <RcppEigen.h>",
code,sep = "\n"
)<- gsub("// [[stan::function]]",
code "// [[Rcpp::export]]", code, fixed = TRUE)
<- gsub(
code "stan::math::accumulator<double>& lp_accum__, std::ostream* pstream__ = nullptr){",
"std::ostream* pstream__ = nullptr){\nstan::math::accumulator<double> lp_accum__;",
code,fixed = TRUE
)<- gsub("__ = nullptr", "__ = 0", code, fixed = TRUE)
code
<- function() {
get_stream return(.Call('get_stream_'))
}<- function(seed=0L) {
get_rng if (!identical(seed, 0L)) {
if (length(seed) != 1)
stop("Seed must be a length-1 integer vector.")
}return(.Call('get_rng_', seed))
}if (expose_to_global_env) {
= globalenv()
env else {
} = new.env()
env
}<- withr::with_makevars(
compiled c(
USE_CXX14 = 1,
PKG_CPPFLAGS = "",
PKG_CXXFLAGS = get_cmdstan_flags("CXXFLAGS"),
PKG_LIBS = paste0(
get_cmdstan_flags("LDLIBS"),
get_cmdstan_flags("LIBSUNDIALS"),
get_cmdstan_flags("TBB_TARGETS"),
get_cmdstan_flags("LDFLAGS_TBB")
)
),::sourceCpp(code = code, env = env)
Rcpp
)for (x in compiled$functions) {
<- get(x, envir = env)
FUN <- formals(FUN)
args $pstream__ <- get_stream()
argsif ("lp__" %in% names(args)) args$lp__ <- 0
if ("base_rng__" %in% names(args)) args$base_rng__ <- get_rng()
formals(FUN) <- args
assign(x, FUN, envir = env)
}assign("stan_rng__", get_rng, envir = env)
if (expose_to_global_env) {
invisible(NULL)
else {
} return(env)
} }
3 Simple example
Once we load the above expose_cmdstanr_functions()
function into the R environmentm we can try it out on a Stan function that applies the softmax()
to each row of an input matrix. We can write the function(s) in a file with a .stan
or .stanfunctions
suffix or a string inside an R script. Using separate files is recommended, as developing functions inside strings is not user friendly. To make it a bit easier for you to copy and run this tutorial on your own, this example is using Stan functions written in strings.
<- "
model_code functions {
matrix rows_softmax(matrix x) {
matrix[rows(x), cols(x)] y;
for(i in 1:rows(x)) {
y[i, :] = softmax(x[i, ]')';
}
return y;
}
}
"
We then use the write_stan_file()
utility function from cmdstanr
to store the model code in a file. If you prefer, you can also use the write()
function in base R or any other function that writes a string in a file. Make sure the file has a .stan
or .stanfunctions
extension.
<- cmdstanr::write_stan_file(code = model_code) stan_file
Finally, we supply the written stan_file
to expose_cmdstanr_functions()
:
<- expose_cmdstanr_functions(model_path = stan_file) udfs
The function returns a new environment with the Stan UDF in it. We can thus use udfs$rows_softmax()
directly in R. We create a matrix of random values
<- matrix(runif(25, 0, 2), nrow = 5)
input_matrix input_matrix
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.3372246 0.8277205 1.9614013 1.454146 0.7730535
## [2,] 1.3449728 1.4893030 1.7413037 1.377557 0.4452663
## [3,] 1.7938820 1.3789612 0.4099089 0.279518 0.7293895
## [4,] 1.7489683 1.2871712 1.0768524 1.586360 1.7205503
## [5,] 0.3748405 1.1560285 0.8086070 1.200402 0.5067908
pass the input matrix to the Stan UDF
<- udfs$rows_softmax(input_matrix)
res res
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.08124108 0.1326769 0.4122367 0.24822677 0.12561854
## [2,] 0.19679475 0.2273502 0.2925081 0.20331275 0.08003426
## [3,] 0.40390587 0.2667364 0.1012111 0.08883829 0.13930827
## [4,] 0.25235426 0.1590211 0.1288589 0.21448195 0.24528380
## [5,] 0.12267925 0.2679395 0.1893013 0.28009665 0.13998335
and finally validate that rows do actually sum to 1.
apply(res, 1, sum)
## [1] 1 1 1 1 1
And that is it! Well, at least if you do not plan on working with random number generators. If you do, continue with the next section.
4 RNG user-defined functions
When working with RNG functions, we need a way to specify the seed for the random number generator to make things reproducible. In the example we will use the gpareto_rng
function from Aki Vehtari’s case study. You can also find it in Sean Pinkney’s helpful Stan functions repo. We can proceed to expose the function:
<- "
gpareto_code functions {
real gpareto_rng(real ymin, real k, real sigma) {
if (sigma <= 0)
reject(\"sigma <= 0; found sigma = \", sigma);
if (fabs(k) > 1e-15)
return ymin + (uniform_rng(0, 1) ^ -k - 1) * sigma / k;
else
return ymin - sigma * log(uniform_rng(0, 1)); // limit k->0
}
}
"
<- cmdstanr::write_stan_file(code = gpareto_code)
stan_file_rng <- expose_cmdstanr_functions(model_path = stan_file_rng) udfs_rng
We define some inputs and generat a few values:
<- rexp(1)
ymin <- rexp(1,5)
k <- rexp(1)
sigma $gpareto_rng(ymin, k, sigma) udfs_rng
## [1] 1.285279
$gpareto_rng(ymin, k, sigma) udfs_rng
## [1] 1.293365
$gpareto_rng(ymin, k, sigma) udfs_rng
## [1] 1.42966
If we do not specify a seed, the random generator is created using the default seed.
If we examine what names are in the returned environment, we will find the gpareto_rng
function we expected. But, we will also find a function called stan_rng__
. This function is used to create a random number generator object with a specified seed. The function has a double underscores suffix in order to avoid any chance of shadowing with a potential Stan UDF function.
names(udfs_rng)
## [1] "stan_rng__" "gpareto_rng"
If we want to make our code reproducible, we generate an RNG object with a seed and then the RNG function and set the base_rng__
argument of the exposed function to our created RNG object. The generator object advances with each call to the Stan RNG user-defined function.
<- rexp(1)
ymin <- rexp(1,5)
k <- rexp(1)
sigma <- udfs_rng$stan_rng__(1)
seed <- c()
vals for(i in 1:10) {
<- c(vals, udfs_rng$gpareto_rng(ymin, k, sigma, base_rng__ = seed))
vals
} vals
## [1] 1.256861 1.270420 1.489992 1.866879 3.310240 2.306312 1.475671 2.727928
## [9] 2.161117 2.284900
If we then create a separate generator object with the same seed we will get the same stream of generated values.
<- udfs_rng$stan_rng__(1)
seed <- c()
vals for(i in 1:10) {
<- c(vals, udfs_rng$gpareto_rng(ymin, k, sigma, base_rng__ = seed))
vals
} vals
## [1] 1.256861 1.270420 1.489992 1.866879 3.310240 2.306312 1.475671 2.727928
## [9] 2.161117 2.284900
5 Exposing UDFs to the global namespace
If you want to expose the functions directly to the global R namespace, set the expose_to_global_env
argument to TRUE
.
expose_cmdstanr_functions(model_path = stan_file, expose_to_global_env = TRUE)
rows_softmax(input_matrix)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.08124108 0.1326769 0.4122367 0.24822677 0.12561854
## [2,] 0.19679475 0.2273502 0.2925081 0.20331275 0.08003426
## [3,] 0.40390587 0.2667364 0.1012111 0.08883829 0.13930827
## [4,] 0.25235426 0.1590211 0.1288589 0.21448195 0.24528380
## [5,] 0.12267925 0.2679395 0.1893013 0.28009665 0.13998335