#-----------------------------------------------------------------------------
# Copyright (c) 2013-2015, PyStan developers
#
# This file is licensed under Version 3.0 of the GNU General Public
# License. See LICENSE for a text of the license.
#-----------------------------------------------------------------------------
import hashlib
import io
import logging
import os
import pystan._api # stanc wrapper
from pystan._compat import string_types, PY2
from pystan.model import StanModel
logger = logging.getLogger('pystan')
[docs]def stanc(file=None, charset='utf-8', model_code=None, model_name="anon_model",
include_paths=None, verbose=False, obfuscate_model_name=True,
allow_undefined=False):
"""Translate Stan model specification into C++ code.
Parameters
----------
file : {string, file}, optional
If filename, the string passed as an argument is expected to
be a filename containing the Stan model specification.
If file, the object passed must have a 'read' method (file-like
object) that is called to fetch the Stan model specification.
charset : string, 'utf-8' by default
If bytes or files are provided, this charset is used to decode.
model_code : string, optional
A string containing the Stan model specification. Alternatively,
the model may be provided with the parameter `file`.
model_name: string, 'anon_model' by default
A string naming the model. If none is provided 'anon_model' is
the default. However, if `file` is a filename, then the filename
will be used to provide a name.
include_paths: list of strings, optional
Paths for #include files defined in Stan code.
verbose : boolean, False by default
Indicates whether intermediate output should be piped to the
console. This output may be useful for debugging.
obfuscate_model_name : boolean, True by default
If False the model name in the generated C++ code will not be made
unique by the insertion of randomly generated characters.
Generally it is recommended that this parameter be left as True.
allow_undefined : boolean, False by default
If True, the C++ code can be written even if there are undefined
functions.
Returns
-------
stanc_ret : dict
A dictionary with the following keys: model_name, model_code,
cpp_code, and status. Status indicates the success of the translation
from Stan code into C++ code (success = 0, error = -1).
Notes
-----
C++ reserved words and Stan reserved words may not be used for
variable names; see the Stan User's Guide for a complete list.
The `#include` method follows a C/C++ syntax `#include foo/my_gp_funs.stan`.
The method needs to be at the start of the row, no whitespace is allowed.
After the included file no whitespace or comments are allowed.
`pystan.experimental`(PyStan 2.18) has a `fix_include`-function to clean the `#include`
statements from the `model_code`.
Example:
`from pystan.experimental import fix_include`
`model_code = fix_include(model_code)`
See also
--------
StanModel : Class representing a compiled Stan model
stan : Fit a model using Stan
References
----------
The Stan Development Team (2013) *Stan Modeling Language User's
Guide and Reference Manual*. <http://mc-stan.org/>.
Examples
--------
>>> stanmodelcode = '''
... data {
... int<lower=0> N;
... real y[N];
... }
...
... parameters {
... real mu;
... }
...
... model {
... mu ~ normal(0, 10);
... y ~ normal(mu, 1);
... }
... '''
>>> r = stanc(model_code=stanmodelcode, model_name = "normal1")
>>> sorted(r.keys())
['cppcode', 'model_code', 'model_cppname', 'model_name', 'status']
>>> r['model_name']
'normal1'
"""
if file and model_code:
raise ValueError("Specify stan model with `file` or `model_code`, "
"not both.")
if file is None and model_code is None:
raise ValueError("Model file missing and empty model_code.")
if file is not None:
if isinstance(file, string_types):
try:
with io.open(file, 'rt', encoding=charset) as f:
model_code = f.read()
except:
logger.critical("Unable to read file specified by `file`.")
raise
else:
model_code = file.read()
# bytes, going into C++ code
model_code_bytes = model_code.encode('utf-8')
if include_paths is None:
include_paths = [os.path.abspath('.')]
elif isinstance(include_paths, string_types):
include_paths = [include_paths]
# add trailing /
include_paths = [os.path.join(path, "") for path in include_paths]
include_paths_bytes = [path.encode('utf-8') for path in include_paths]
if obfuscate_model_name:
# Make the model name depend on the code.
model_name = (
model_name + '_' +
hashlib.md5(model_code_bytes).hexdigest())
model_name_bytes = model_name.encode('ascii')
if not isinstance(file, string_types):
# use default 'unknown file name'
filename_bytes = b'unknown file name'
else:
# use only the filename, used only for debug printing
filename_bytes = os.path.split(file)[-1].encode('utf-8')
result = pystan._api.stanc(model_code_bytes, model_name_bytes,
allow_undefined, filename_bytes,
include_paths_bytes,
)
if result['status'] == -1: # EXCEPTION_RC is -1
msg = result['msg']
if PY2:
# fix problem with unicode in error message in PY2
msg = msg.encode('ascii', 'replace')
error_msg = "Failed to parse Stan model '{}'. Error message:\n{}".format(model_name, msg)
raise ValueError(error_msg)
elif result['status'] == 0: # SUCCESS_RC is 0
logger.debug("Successfully parsed Stan model '{}'.".format(model_name))
del result['msg']
result.update({'model_name': model_name})
result.update({'model_code': model_code})
result.update({'include_paths' : include_paths})
return result
[docs]def stan(file=None, model_name="anon_model", model_code=None, fit=None,
data=None, pars=None, chains=4, iter=2000, warmup=None, thin=1,
init="random", seed=None, algorithm=None, control=None, sample_file=None,
diagnostic_file=None, verbose=False, boost_lib=None, eigen_lib=None,
include_paths=None, n_jobs=-1, allow_undefined=False, **kwargs):
"""Fit a model using Stan.
The `pystan.stan` function was deprecated in version 2.17 and will be
removed in version 3.0. Compiling and using a Stan Program (e.g., for
drawing samples) should be done in separate steps.
Parameters
----------
file : string {'filename', file-like object}
Model code must found via one of the following parameters: `file` or
`model_code`.
If `file` is a filename, the string passed as an argument is expected
to be a filename containing the Stan model specification.
If `file` is a file object, the object passed must have a 'read' method
(file-like object) that is called to fetch the Stan model specification.
charset : string, optional
If bytes or files are provided, this charset is used to decode. 'utf-8'
by default.
model_code : string
A string containing the Stan model specification. Alternatively,
the model may be provided with the parameter `file`.
model_name: string, optional
A string naming the model. If none is provided 'anon_model' is
the default. However, if `file` is a filename, then the filename
will be used to provide a name. 'anon_model' by default.
fit : StanFit instance
An instance of StanFit derived from a previous fit, None by
default. If `fit` is not None, the compiled model associated
with a previous fit is reused and recompilation is avoided.
data : dict
A Python dictionary providing the data for the model. Variables
for Stan are stored in the dictionary as expected. Variable
names are the keys and the values are their associated values.
Stan only accepts certain kinds of values; see Notes.
pars : list of string, optional
A list of strings indicating parameters of interest. By default
all parameters specified in the model will be stored.
chains : int, optional
Positive integer specifying number of chains. 4 by default.
iter : int, 2000 by default
Positive integer specifying how many iterations for each chain
including warmup.
warmup : int, iter//2 by default
Positive integer specifying number of warmup (aka burin) iterations.
As `warmup` also specifies the number of iterations used for stepsize
adaption, warmup samples should not be used for inference.
thin : int, optional
Positive integer specifying the period for saving samples.
Default is 1.
init : {0, '0', 'random', function returning dict, list of dict}, optional
Specifies how initial parameter values are chosen:
- 0 or '0' initializes all to be zero on the unconstrained support.
- 'random' generates random initial values. An optional parameter
`init_r` controls the range of randomly generated initial values
for parameters in terms of their unconstrained support;
- list of size equal to the number of chains (`chains`), where the
list contains a dict with initial parameter values;
- function returning a dict with initial parameter values. The
function may take an optional argument `chain_id`.
seed : int or np.random.RandomState, optional
The seed, a positive integer for random number generation. Only
one seed is needed when multiple chains are used, as the other
chain's seeds are generated from the first chain's to prevent
dependency among random number streams. By default, seed is
``random.randint(0, MAX_UINT)``.
algorithm : {"NUTS", "HMC", "Fixed_param"}, optional
One of the algorithms that are implemented in Stan such as the No-U-Turn
sampler (NUTS, Hoffman and Gelman 2011) and static HMC.
sample_file : string, optional
File name specifying where samples for *all* parameters and other
saved quantities will be written. If not provided, no samples
will be written. If the folder given is not writable, a temporary
directory will be used. When there are multiple chains, an underscore
and chain number are appended to the file name. By default do not
write samples to file.
diagnostic_file : string, optional
File name specifying where diagnostic information should be written.
By default no diagnostic information is recorded.
boost_lib : string, optional
The path to a version of the Boost C++ library to use instead of
the one supplied with PyStan.
eigen_lib : string, optional
The path to a version of the Eigen C++ library to use instead of
the one in the supplied with PyStan.
include_paths : list of strings, optional
Paths for #include files defined in Stan code.
verbose : boolean, optional
Indicates whether intermediate output should be piped to the console.
This output may be useful for debugging. False by default.
control : dict, optional
A dictionary of parameters to control the sampler's behavior. Default
values are used if control is not specified. The following are
adaptation parameters for sampling algorithms.
These are parameters used in Stan with similar names:
- `adapt_engaged` : bool
- `adapt_gamma` : float, positive, default 0.05
- `adapt_delta` : float, between 0 and 1, default 0.8
- `adapt_kappa` : float, between default 0.75
- `adapt_t0` : float, positive, default 10
- `adapt_init_buffer` : int, positive, defaults to 75
- `adapt_term_buffer` : int, positive, defaults to 50
- `adapt_window` : int, positive, defaults to 25
In addition, the algorithm HMC (called 'static HMC' in Stan) and NUTS
share the following parameters:
- `stepsize`: float, positive
- `stepsize_jitter`: float, between 0 and 1
- `metric` : str, {"unit_e", "diag_e", "dense_e"}
In addition, depending on which algorithm is used, different parameters
can be set as in Stan for sampling. For the algorithm HMC we can set
- `int_time`: float, positive
For algorithm NUTS, we can set
- `max_treedepth` : int, positive
n_jobs : int, optional
Sample in parallel. If -1 all CPUs are used. If 1, no parallel
computing code is used at all, which is useful for debugging.
allow_undefined : boolean, False by default
If True, the C++ code can be written even if there are undefined
functions.
Returns
-------
fit : StanFit instance
Other parameters
----------------
chain_id : int, optional
`chain_id` can be a vector to specify the chain_id for all chains or
an integer. For the former case, they should be unique. For the latter,
the sequence of integers starting from the given `chain_id` are used
for all chains.
init_r : float, optional
`init_r` is only valid if `init` == "random". In this case, the intial
values are simulated from [-`init_r`, `init_r`] rather than using the
default interval (see the manual of (Cmd)Stan).
test_grad: bool, optional
If `test_grad` is ``True``, Stan will not do any sampling. Instead,
the gradient calculation is tested and printed out and the fitted
StanFit4Model object is in test gradient mode. By default, it is
``False``.
append_samples`: bool, optional
refresh`: int, optional
Argument `refresh` can be used to control how to indicate the progress
during sampling (i.e. show the progress every \code{refresh} iterations).
By default, `refresh` is `max(iter/10, 1)`.
obfuscate_model_name : boolean, optional
`obfuscate_model_name` is only valid if `fit` is None. True by default.
If False the model name in the generated C++ code will not be made
unique by the insertion of randomly generated characters.
Generally it is recommended that this parameter be left as True.
Examples
--------
>>> from pystan import stan
>>> import numpy as np
>>> model_code = '''
... parameters {
... real y[2];
... }
... model {
... y[1] ~ normal(0, 1);
... y[2] ~ double_exponential(0, 2);
... }'''
>>> fit1 = stan(model_code=model_code, iter=10)
>>> print(fit1)
>>> excode = '''
... transformed data {
... real y[20];
... y[1] = 0.5796; y[2] = 0.2276; y[3] = -0.2959;
... y[4] = -0.3742; y[5] = 0.3885; y[6] = -2.1585;
... y[7] = 0.7111; y[8] = 1.4424; y[9] = 2.5430;
... y[10] = 0.3746; y[11] = 0.4773; y[12] = 0.1803;
... y[13] = 0.5215; y[14] = -1.6044; y[15] = -0.6703;
... y[16] = 0.9459; y[17] = -0.382; y[18] = 0.7619;
... y[19] = 0.1006; y[20] = -1.7461;
... }
... parameters {
... real mu;
... real<lower=0, upper=10> sigma;
... vector[2] z[3];
... real<lower=0> alpha;
... }
... model {
... y ~ normal(mu, sigma);
... for (i in 1:3)
... z[i] ~ normal(0, 1);
... alpha ~ exponential(2);
... }'''
>>>
>>> def initfun1():
... return dict(mu=1, sigma=4, z=np.random.normal(size=(3, 2)), alpha=1)
>>> exfit0 = stan(model_code=excode, init=initfun1)
>>> def initfun2(chain_id=1):
... return dict(mu=1, sigma=4, z=np.random.normal(size=(3, 2)), alpha=1 + chain_id)
>>> exfit1 = stan(model_code=excode, init=initfun2)
"""
logger.warning('DeprecationWarning: pystan.stan was deprecated in version 2.17 and will be removed in version 3.0. '
'Compile and use a Stan program in separate steps.')
# NOTE: this is a thin wrapper for other functions. Error handling occurs
# elsewhere.
if data is None:
data = {}
if warmup is None:
warmup = int(iter // 2)
obfuscate_model_name = kwargs.pop("obfuscate_model_name", True)
if fit is not None:
m = fit.stanmodel
else:
m = StanModel(file=file, model_name=model_name, model_code=model_code,
boost_lib=boost_lib, eigen_lib=eigen_lib,
include_paths=include_paths,
obfuscate_model_name=obfuscate_model_name, verbose=verbose,
allow_undefined=allow_undefined)
# check that arguments in kwargs are valid
valid_args = {"chain_id", "init_r", "test_grad", "append_samples", "enable_random_init",
"refresh", "control"}
for arg in kwargs:
if arg not in valid_args:
raise ValueError("Parameter `{}` is not recognized.".format(arg))
fit = m.sampling(data, pars=pars, chains=chains, iter=iter,
warmup=warmup, thin=thin, seed=seed, init=init,
sample_file=sample_file, diagnostic_file=diagnostic_file,
verbose=verbose, algorithm=algorithm, control=control,
n_jobs=n_jobs, **kwargs)
return fit