Source code for pystan.api

#-----------------------------------------------------------------------------
# Copyright (c) 2013-2015, PyStan developers
#
# This file is licensed under Version 3.0 of the GNU General Public
# License. See LICENSE for a text of the license.
#-----------------------------------------------------------------------------

import hashlib
import io
import logging
import os

import pystan._api  # stanc wrapper
from pystan._compat import string_types, PY2
from pystan.model import StanModel

logger = logging.getLogger('pystan')


[docs]def stanc(file=None, charset='utf-8', model_code=None, model_name="anon_model", include_paths=None, verbose=False, obfuscate_model_name=True, allow_undefined=False): """Translate Stan model specification into C++ code. Parameters ---------- file : {string, file}, optional If filename, the string passed as an argument is expected to be a filename containing the Stan model specification. If file, the object passed must have a 'read' method (file-like object) that is called to fetch the Stan model specification. charset : string, 'utf-8' by default If bytes or files are provided, this charset is used to decode. model_code : string, optional A string containing the Stan model specification. Alternatively, the model may be provided with the parameter `file`. model_name: string, 'anon_model' by default A string naming the model. If none is provided 'anon_model' is the default. However, if `file` is a filename, then the filename will be used to provide a name. include_paths: list of strings, optional Paths for #include files defined in Stan code. verbose : boolean, False by default Indicates whether intermediate output should be piped to the console. This output may be useful for debugging. obfuscate_model_name : boolean, True by default If False the model name in the generated C++ code will not be made unique by the insertion of randomly generated characters. Generally it is recommended that this parameter be left as True. allow_undefined : boolean, False by default If True, the C++ code can be written even if there are undefined functions. Returns ------- stanc_ret : dict A dictionary with the following keys: model_name, model_code, cpp_code, and status. Status indicates the success of the translation from Stan code into C++ code (success = 0, error = -1). Notes ----- C++ reserved words and Stan reserved words may not be used for variable names; see the Stan User's Guide for a complete list. The `#include` method follows a C/C++ syntax `#include foo/my_gp_funs.stan`. The method needs to be at the start of the row, no whitespace is allowed. After the included file no whitespace or comments are allowed. `pystan.experimental`(PyStan 2.18) has a `fix_include`-function to clean the `#include` statements from the `model_code`. Example: `from pystan.experimental import fix_include` `model_code = fix_include(model_code)` See also -------- StanModel : Class representing a compiled Stan model stan : Fit a model using Stan References ---------- The Stan Development Team (2013) *Stan Modeling Language User's Guide and Reference Manual*. <http://mc-stan.org/>. Examples -------- >>> stanmodelcode = ''' ... data { ... int<lower=0> N; ... real y[N]; ... } ... ... parameters { ... real mu; ... } ... ... model { ... mu ~ normal(0, 10); ... y ~ normal(mu, 1); ... } ... ''' >>> r = stanc(model_code=stanmodelcode, model_name = "normal1") >>> sorted(r.keys()) ['cppcode', 'model_code', 'model_cppname', 'model_name', 'status'] >>> r['model_name'] 'normal1' """ if file and model_code: raise ValueError("Specify stan model with `file` or `model_code`, " "not both.") if file is None and model_code is None: raise ValueError("Model file missing and empty model_code.") if file is not None: if isinstance(file, string_types): try: with io.open(file, 'rt', encoding=charset) as f: model_code = f.read() except: logger.critical("Unable to read file specified by `file`.") raise else: model_code = file.read() # bytes, going into C++ code model_code_bytes = model_code.encode('utf-8') if include_paths is None: include_paths = [os.path.abspath('.')] elif isinstance(include_paths, string_types): include_paths = [include_paths] # add trailing / include_paths = [os.path.join(path, "") for path in include_paths] include_paths_bytes = [path.encode('utf-8') for path in include_paths] if obfuscate_model_name: # Make the model name depend on the code. model_name = ( model_name + '_' + hashlib.md5(model_code_bytes).hexdigest()) model_name_bytes = model_name.encode('ascii') if not isinstance(file, string_types): # use default 'unknown file name' filename_bytes = b'unknown file name' else: # use only the filename, used only for debug printing filename_bytes = os.path.split(file)[-1].encode('utf-8') result = pystan._api.stanc(model_code_bytes, model_name_bytes, allow_undefined, filename_bytes, include_paths_bytes, ) if result['status'] == -1: # EXCEPTION_RC is -1 msg = result['msg'] if PY2: # fix problem with unicode in error message in PY2 msg = msg.encode('ascii', 'replace') error_msg = "Failed to parse Stan model '{}'. Error message:\n{}".format(model_name, msg) raise ValueError(error_msg) elif result['status'] == 0: # SUCCESS_RC is 0 logger.debug("Successfully parsed Stan model '{}'.".format(model_name)) del result['msg'] result.update({'model_name': model_name}) result.update({'model_code': model_code}) result.update({'include_paths' : include_paths}) return result
[docs]def stan(file=None, model_name="anon_model", model_code=None, fit=None, data=None, pars=None, chains=4, iter=2000, warmup=None, thin=1, init="random", seed=None, algorithm=None, control=None, sample_file=None, diagnostic_file=None, verbose=False, boost_lib=None, eigen_lib=None, include_paths=None, n_jobs=-1, allow_undefined=False, **kwargs): """Fit a model using Stan. The `pystan.stan` function was deprecated in version 2.17 and will be removed in version 3.0. Compiling and using a Stan Program (e.g., for drawing samples) should be done in separate steps. Parameters ---------- file : string {'filename', file-like object} Model code must found via one of the following parameters: `file` or `model_code`. If `file` is a filename, the string passed as an argument is expected to be a filename containing the Stan model specification. If `file` is a file object, the object passed must have a 'read' method (file-like object) that is called to fetch the Stan model specification. charset : string, optional If bytes or files are provided, this charset is used to decode. 'utf-8' by default. model_code : string A string containing the Stan model specification. Alternatively, the model may be provided with the parameter `file`. model_name: string, optional A string naming the model. If none is provided 'anon_model' is the default. However, if `file` is a filename, then the filename will be used to provide a name. 'anon_model' by default. fit : StanFit instance An instance of StanFit derived from a previous fit, None by default. If `fit` is not None, the compiled model associated with a previous fit is reused and recompilation is avoided. data : dict A Python dictionary providing the data for the model. Variables for Stan are stored in the dictionary as expected. Variable names are the keys and the values are their associated values. Stan only accepts certain kinds of values; see Notes. pars : list of string, optional A list of strings indicating parameters of interest. By default all parameters specified in the model will be stored. chains : int, optional Positive integer specifying number of chains. 4 by default. iter : int, 2000 by default Positive integer specifying how many iterations for each chain including warmup. warmup : int, iter//2 by default Positive integer specifying number of warmup (aka burin) iterations. As `warmup` also specifies the number of iterations used for stepsize adaption, warmup samples should not be used for inference. thin : int, optional Positive integer specifying the period for saving samples. Default is 1. init : {0, '0', 'random', function returning dict, list of dict}, optional Specifies how initial parameter values are chosen: - 0 or '0' initializes all to be zero on the unconstrained support. - 'random' generates random initial values. An optional parameter `init_r` controls the range of randomly generated initial values for parameters in terms of their unconstrained support; - list of size equal to the number of chains (`chains`), where the list contains a dict with initial parameter values; - function returning a dict with initial parameter values. The function may take an optional argument `chain_id`. seed : int or np.random.RandomState, optional The seed, a positive integer for random number generation. Only one seed is needed when multiple chains are used, as the other chain's seeds are generated from the first chain's to prevent dependency among random number streams. By default, seed is ``random.randint(0, MAX_UINT)``. algorithm : {"NUTS", "HMC", "Fixed_param"}, optional One of the algorithms that are implemented in Stan such as the No-U-Turn sampler (NUTS, Hoffman and Gelman 2011) and static HMC. sample_file : string, optional File name specifying where samples for *all* parameters and other saved quantities will be written. If not provided, no samples will be written. If the folder given is not writable, a temporary directory will be used. When there are multiple chains, an underscore and chain number are appended to the file name. By default do not write samples to file. diagnostic_file : string, optional File name specifying where diagnostic information should be written. By default no diagnostic information is recorded. boost_lib : string, optional The path to a version of the Boost C++ library to use instead of the one supplied with PyStan. eigen_lib : string, optional The path to a version of the Eigen C++ library to use instead of the one in the supplied with PyStan. include_paths : list of strings, optional Paths for #include files defined in Stan code. verbose : boolean, optional Indicates whether intermediate output should be piped to the console. This output may be useful for debugging. False by default. control : dict, optional A dictionary of parameters to control the sampler's behavior. Default values are used if control is not specified. The following are adaptation parameters for sampling algorithms. These are parameters used in Stan with similar names: - `adapt_engaged` : bool - `adapt_gamma` : float, positive, default 0.05 - `adapt_delta` : float, between 0 and 1, default 0.8 - `adapt_kappa` : float, between default 0.75 - `adapt_t0` : float, positive, default 10 - `adapt_init_buffer` : int, positive, defaults to 75 - `adapt_term_buffer` : int, positive, defaults to 50 - `adapt_window` : int, positive, defaults to 25 In addition, the algorithm HMC (called 'static HMC' in Stan) and NUTS share the following parameters: - `stepsize`: float, positive - `stepsize_jitter`: float, between 0 and 1 - `metric` : str, {"unit_e", "diag_e", "dense_e"} In addition, depending on which algorithm is used, different parameters can be set as in Stan for sampling. For the algorithm HMC we can set - `int_time`: float, positive For algorithm NUTS, we can set - `max_treedepth` : int, positive n_jobs : int, optional Sample in parallel. If -1 all CPUs are used. If 1, no parallel computing code is used at all, which is useful for debugging. allow_undefined : boolean, False by default If True, the C++ code can be written even if there are undefined functions. Returns ------- fit : StanFit instance Other parameters ---------------- chain_id : int, optional `chain_id` can be a vector to specify the chain_id for all chains or an integer. For the former case, they should be unique. For the latter, the sequence of integers starting from the given `chain_id` are used for all chains. init_r : float, optional `init_r` is only valid if `init` == "random". In this case, the intial values are simulated from [-`init_r`, `init_r`] rather than using the default interval (see the manual of (Cmd)Stan). test_grad: bool, optional If `test_grad` is ``True``, Stan will not do any sampling. Instead, the gradient calculation is tested and printed out and the fitted StanFit4Model object is in test gradient mode. By default, it is ``False``. append_samples`: bool, optional refresh`: int, optional Argument `refresh` can be used to control how to indicate the progress during sampling (i.e. show the progress every \code{refresh} iterations). By default, `refresh` is `max(iter/10, 1)`. obfuscate_model_name : boolean, optional `obfuscate_model_name` is only valid if `fit` is None. True by default. If False the model name in the generated C++ code will not be made unique by the insertion of randomly generated characters. Generally it is recommended that this parameter be left as True. Examples -------- >>> from pystan import stan >>> import numpy as np >>> model_code = ''' ... parameters { ... real y[2]; ... } ... model { ... y[1] ~ normal(0, 1); ... y[2] ~ double_exponential(0, 2); ... }''' >>> fit1 = stan(model_code=model_code, iter=10) >>> print(fit1) >>> excode = ''' ... transformed data { ... real y[20]; ... y[1] = 0.5796; y[2] = 0.2276; y[3] = -0.2959; ... y[4] = -0.3742; y[5] = 0.3885; y[6] = -2.1585; ... y[7] = 0.7111; y[8] = 1.4424; y[9] = 2.5430; ... y[10] = 0.3746; y[11] = 0.4773; y[12] = 0.1803; ... y[13] = 0.5215; y[14] = -1.6044; y[15] = -0.6703; ... y[16] = 0.9459; y[17] = -0.382; y[18] = 0.7619; ... y[19] = 0.1006; y[20] = -1.7461; ... } ... parameters { ... real mu; ... real<lower=0, upper=10> sigma; ... vector[2] z[3]; ... real<lower=0> alpha; ... } ... model { ... y ~ normal(mu, sigma); ... for (i in 1:3) ... z[i] ~ normal(0, 1); ... alpha ~ exponential(2); ... }''' >>> >>> def initfun1(): ... return dict(mu=1, sigma=4, z=np.random.normal(size=(3, 2)), alpha=1) >>> exfit0 = stan(model_code=excode, init=initfun1) >>> def initfun2(chain_id=1): ... return dict(mu=1, sigma=4, z=np.random.normal(size=(3, 2)), alpha=1 + chain_id) >>> exfit1 = stan(model_code=excode, init=initfun2) """ logger.warning('DeprecationWarning: pystan.stan was deprecated in version 2.17 and will be removed in version 3.0. ' 'Compile and use a Stan program in separate steps.') # NOTE: this is a thin wrapper for other functions. Error handling occurs # elsewhere. if data is None: data = {} if warmup is None: warmup = int(iter // 2) obfuscate_model_name = kwargs.pop("obfuscate_model_name", True) if fit is not None: m = fit.stanmodel else: m = StanModel(file=file, model_name=model_name, model_code=model_code, boost_lib=boost_lib, eigen_lib=eigen_lib, include_paths=include_paths, obfuscate_model_name=obfuscate_model_name, verbose=verbose, allow_undefined=allow_undefined) # check that arguments in kwargs are valid valid_args = {"chain_id", "init_r", "test_grad", "append_samples", "enable_random_init", "refresh", "control"} for arg in kwargs: if arg not in valid_args: raise ValueError("Parameter `{}` is not recognized.".format(arg)) fit = m.sampling(data, pars=pars, chains=chains, iter=iter, warmup=warmup, thin=thin, seed=seed, init=init, sample_file=sample_file, diagnostic_file=diagnostic_file, verbose=verbose, algorithm=algorithm, control=control, n_jobs=n_jobs, **kwargs) return fit