Source code for hetgpy.optim

from __future__ import annotations
import numpy as np
from copy import deepcopy
from joblib import Parallel, delayed
import hetgpy
from hetgpy import hetGP, homGP
from hetgpy.contour import crit_cSUR, crit_ICU, crit_MCU, crit_MEE, crit_tMSE
from hetgpy.covariance_functions import cov_gen, partial_cov_gen, euclidean_dist
from hetgpy.qEI import qEI_cpp
  
from scipy.stats import t, norm
from scipy.special import gamma, erfc, erfcx
from scipy.stats.qmc import LatinHypercube
from scipy.optimize import minimize
from hetgpy.IMSE import maximinSA_LHS
from hetgpy.utils import duplicated

[docs] def crit_EI(x, model, cst = None, preds = None): r''' Expected Improvement (EI) criteria Parameters ---------- x: nd_arraylike model designs, one point per row model: hetgpy.hetGP hetGP or homGP model cst: float optional plugin value of the mean preds: Dict model predictions (optional) References ---------- Mockus, J.; Tiesis, V. & Zilinskas, A. (1978). The application of Bayesian methods for seeking the extremum Towards Global Optimization, Amsterdam: Elsevier, 2, 2. Vazquez E, Villemonteix J, Sidorkiewicz M, Walter E (2008). Global Optimization Based on Noisy Evaluations: An Empirical Study of Two Statistical Approaches, Journal of Physics: Conference Series, 135, IOP Publishing. Examples -------- >>> from hetgpy.test_functions import f1d >>> from hetgpy.homGP import homGP >>> from hetgpy.optim import crit_EI >>> import numpy as np >>> ftest = f1d >>> n_init = 5 # number of unique designs >>> X = np.linspace(0, 1, n_init).reshape(-1,1) >>> Z = ftest(X) >>> xgrid = np.linspace(0,1,51).reshape(-1,1) >>> model = homGP() >>> model.mle(X = X, Z = Z, lower = np.array([0.01]), upper = np.array([1]), known = dict(g = 2e-8)) >>> EI = crit_EI(xgrid, model, cst = model.Z0.min()) ''' if cst is None: cst = np.min(model.predict(x = model['X0'])['mean']) if len(x.shape) == 1: x = x.reshape(-1,model.X0.shape[1]) if preds is None: preds = model.predict(x = x) if type(model)== hetgpy.homTP or type(model)== hetgpy.hetTP: gamma = (cst - preds['mean'])/np.sqrt(preds['sd2']) res = (cst - preds['mean']) * t.cdf(gamma, df = model['nu'] + len(model['Z'])) res = res + np.sqrt(preds['sd2']) * (1 + (gamma**2 - 1)/(model['nu'] + len(model['Z']) - 1)) * t.pdf(x = gamma, df = model['nu'] + len(model['Z'])) res[np.where(res < 1e-12)] = 0 # for stability return(res) xcr = (cst - preds['mean'])/np.sqrt(preds['sd2']) res = (cst - preds['mean']) * norm.cdf(xcr) res = res + np.sqrt(preds['sd2']) * norm.pdf(xcr) res[(preds['sd2'] < np.sqrt(np.finfo(float).eps)) | (res < 1e-12)] = 0 # for stability return res
[docs] def deriv_crit_EI(x, model, cst = None, preds = None): r''' Derivative for crit_EI, used in crit_optim Parameters ---------- x: nd_arraylike model designs, one point per row model: hetgpy.hetGP hetGP or homGP model cst: float optional plugin value of the mean preds: Dict model predictions (optional) References ---------- Ginsbourger, D. Multiples metamodeles pour l'approximation et l'optimisation de fonctions numeriques multivariables Ecole Nationale Superieure des Mines de Saint-Etienne, Ecole Nationale Superieure des Mines de Saint-Etienne, 2009 Roustant, O., Ginsbourger, D., DiceKriging, DiceOptim: Two R packages for the analysis of computer experiments by kriging-based metamodeling and optimization, Journal of Statistical Software, 2012 ''' if cst is None: cst = np.min(model.predict(x = model.X0)['mean']) if len(x.shape) == 1: x = x.reshape(-1,model.X0.shape[1]) if preds is None: preds = model.predict(x = x) pred_gr = predict_gr(model, x) z = (cst - preds['mean'])/np.sqrt(preds['sd2']) if type(model)==homGP or type(model)==hetGP: res = pred_gr['sd2'] / (2 * np.sqrt(preds['sd2'])) * norm.pdf(z) - pred_gr['mean'] * norm.cdf(z) else: # dz = - dm/s - z ds/s = -dm/s - z * ds2/(2s2) dz = -pred_gr['mean']/np.sqrt(preds['sd2']) - z * pred_gr['sd2']/(2 * preds['sd2'].reshape(x.shape[0],x.shape[0]).T) # d( (cst - m(x)).pt(z(x))) = p1 = -pred_gr['mean'] * t.cdf(z, df = model.nu + len(model.Z)) + (cst - preds['mean']) * dz * t.pdf(z, df = model.nu + len(model.Z)) a = model.nu + len(model.Z) - 1 # d( s(x) (1 + (z^2-1)/(nu + N -1)) dt(z(x)) (in 2 lines) p2 = (pred_gr['sd2']/(2*np.sqrt(preds['sd2'])) * (1 + (z**2 - 1)/a) + 2*np.sqrt(preds['sd2']) * z * dz/a) * t.pdf(z, df = model.nu + len(model.Z)) p2 = p2 + np.sqrt(preds['sd2']) * (1 + (z**2 - 1)/a) * dz * dlambda(z, model.nu + len(model.Z)) res = p1 + p2 res[np.abs(res) < 1e-12] = 0 # for stability with optim return res
[docs] def predict_gr(model, x): r''' Gradient of the prediction given a model ''' if len(x.shape) == 1: x = x.reshape(-1,1) kvec = cov_gen(X1 = model.X0, X2 = x, theta = model.theta, type = model.covtype) dm = np.full(fill_value=np.nan, shape = (x.shape[0], x.shape[1])) ds2 = dm.copy() for i in range(x.shape[0]): dkvec = np.full(fill_value=np.nan, shape=(model.X0.shape[0], x.shape[1])) for j in range(x.shape[1]): dkvec[:, j] = np.squeeze(partial_cov_gen(X1 = x[i:i+1,:], X2 = model.X0, theta = model.theta, i1 = 1, i2 = j + 1, arg = "X_i_j", type = model.covtype)) * kvec[:,i] dm[i,:] = ((model.Z0 - model.beta0).T @ model.Ki) @ dkvec if (type(model)==homGP or type(model)==hetGP) and model.trendtype == "OK": tmp = np.squeeze(1 - (model.Ki.sum(axis=1)) @ kvec[:,i])/(np.sum(model.Ki)) * (model.Ki.sum(axis=1)) @ dkvec else: tmp = 0 ds2[i,:] = -2 * ((kvec[:,i].T @ model.Ki) @ dkvec + tmp) if type(model)==homGP or type(model)==hetGP: return dict(mean = dm, sd2 = model.nu_hat * ds2) else: return dict(mean = model.sigma2 * dm, sd2 = (model.nu + model.psi - 2) / (model.nu + len(model.Z) - 2) * model.sigma2**2 * ds2)
[docs] def dlambda(z,a): r''' Derivative of the student-t pdf Parameters ---------- z: float input location a: float degree of freedom parameter Returns ------- derivative of student-t pdf Examples -------- >>> dlambda(0.55, 3.6) # -0.2005827 ''' return -(a + 1) * gamma((a + 1)/2)/(np.sqrt(np.pi * a) * a * gamma(a/2)) * z * ((a + z**2)/a)**(-(a +3)/2)
[docs] def crit_qEI(x, model, cst = None, preds = None): r''' Fast approximated batch-Expected Improvement criterion (for minimization) Parallel Expected improvement Parameters ---------- x: nd_array matrix of new designs representing the batch of q points, one point per row (q x d) model: homGP/hetGP model object including inverve matrices cst: nd_array optional optional plugin value used in the EI, see details preds: dict optional predictions at x to avoid recomputing if already done (must include the predictive covariance, i.e., the cov slot) Returns ------- qEI_cpp Details ------- cst is classically the observed minimum in the deterministic case. In the noisy case, the min of the predictive mean works fine. This is a beta version at this point. It may work for for TP models as well. References ---------- M. Binois (2015), Uncertainty quantification on Pareto fronts and high-dimensional strategies in Bayesian optimization, with applications in multi-objective automotive design. Ecole Nationale Superieure des Mines de Saint-Etienne, PhD thesis. Examples -------- >>> from hetgpy.test_functions import f1d >>> from hetgpy.homGP import homGP >>> from hetgpy.optim import crit_qEI >>> import numpy as np >>> ftest = f1d >>> n_init = 5 # number of unique designs >>> X = np.linspace(0, 1, n_init).reshape(-1,1) >>> Z = ftest(X) >>> xgrid = np.linspace(0,1,51).reshape(-1,1) >>> model = homGP() >>> model.mleHomGP(X = X, Z = Z, lower = np.array([0.01]), upper = np.array([1]), known = dict(g = 2e-8)) >>> xbatch = np.array((0.37, 0.17, 0.7)).reshape(3, 1) >>> fqEI = crit_qEI(xbatch, model, cst = model.Z0.min()) ''' if cst is None: cst = np.min(model.predict(x = model.X0)['mean']) if len(x.shape)==1: x = x.reshape(-1,1) if preds is None or preds.get('cov') is None: preds = model.predict(x = x, xprime = x) # cov2cor not available in python, so compute it ourselves cov = preds['cov'] Dinv = np.diag(1.0 / np.sqrt(np.diag(cov))) # inverse of diagonal matrix cormat = Dinv @ cov @ Dinv res = qEI_cpp(mu = -preds['mean'], s = np.sqrt(preds['sd2']), cor = cormat, threshold = -cst) return res
def crit_search(model, crit, replicate = False, Xcand = None, control = dict(tol_dist = 1e-6, tol_diff = 1e-6, multi_start = 20, maxit = 100, maximin = True, Xstart = None), seed = None, ncores = 1, **kwargs): crit_to_function = { 'crit_EI': crit_EI, 'crit_cSUR': crit_cSUR, 'crit_ICU':crit_ICU, 'crit_MCU':crit_MCU, 'crit_MEE':crit_MEE, 'crit':crit_tMSE } crit_func = crit_to_function[crit] def fn(*args): # maximize return -1.0 * crit_func(*args) if(replicate): ## Discrete optimization res = [] for i in range(model.X0.shape[0]): res.append( crit_func(x = model.X0[i:i+1,:], model = model, **kwargs) ) return dict( par = model.X0[np.argmax(res),:].reshape(1,-1), value = np.max(res), new = False, id = np.argmax(res) ) if control is None: control = dict(multi_start = 20, maxit = 100) if control.get('multi_start') is None: control['multi_start'] = 20 if control.get('maxit') is None: control['maxit'] = 100 if control.get('maximin') is None: control['maximin'] = True if control.get('tol_dist') is None: control['tol_dist'] = 1e-6 if control.get('tol_diff') is None: control['tol_diff'] = 1e-6 d = model.X0.shape[1] if crit == "crit_EI": def grad(*args): # maximization return -1.0 * deriv_crit_EI(*args) gr = grad else: gr = None ## Optimization if Xcand is None: ## Continuous optimization if control.get('Xstart') is not None: Xstart = control['Xstart'] else: if seed is None: seed = np.random.default_rng().choice(2**15) ## To be changed? if control['maximin']: if(d == 1): # perturbed 1d equidistant points Xstart = (np.linspace(1/2, control['multi_start'] -1/2, control['multi_start']) + np.random.default_rng(seed).uniform(size=control['multi_start'], low = -1/2, high = 1/2)).reshape(-1,1)/control['multi_start'] else: sampler = LatinHypercube(d=d,seed=seed) Xstart = maximinSA_LHS(sampler.random(n=control['multi_start']))['design'] else: sampler = LatinHypercube(d=d,seed=seed) Xstart = sampler(n=control['multi_start']) #model = deepcopy(model) res = dict(par = np.array([np.nan]), value = np.inf, new = np.nan) crit_to_args = { 'crit_EI': {'model':model,'cst': None,'preds':kwargs.get('preds')}, 'crit_MEE': {'model':model,'thres':kwargs.get('thres',0),'preds':kwargs.get('preds')}, 'crit_cSUR': {'model':model,'thres':kwargs.get('thres',0),'preds':kwargs.get('preds')}, 'crit_ICU': {'model':model,'thres':kwargs.get('thres',0), 'Xref':kwargs.get('Xref',None), 'w':kwargs.get('w',None), 'preds':kwargs.get('preds',None), 'kxprime':kwargs.get('kxprime',None)}, 'crit_tMSE': {'model':model,'thres':kwargs.get('thres',0),'preds':kwargs.get('preds'), 'seps':kwargs.get('seps',0.05)}, 'crit_MCU': {'model':model,'thres':kwargs.get('thres',0), 'gamma':kwargs.get('gamma',2), 'preds':kwargs.get('preds',None)} } def local_opt_fun(i): out = minimize( x0 = Xstart[i,:], fun = fn, jac = gr, args = tuple(crit_to_args[crit].values()), method = "L-BFGS-B", bounds = [(0,1) for _ in range(d)], options=dict(maxiter=control['maxit'], #, ftol = control.get('factr',10e9) * np.finfo(float).eps,#, gtol = control.get('pgtol',0) # should map to pgtol ) ) python_kws_2_R_kws = { 'x':'par', 'fun': 'value', 'nfev': 'counts' } for key, val in python_kws_2_R_kws.items(): out[val] = out[key] return out all_res = Parallel(n_jobs=ncores)(delayed(local_opt_fun)(i) for i in range(Xstart.shape[0])) all_res_values = [res['value'] for res in all_res] res_min = np.nanargmin(all_res_values) par = all_res[res_min]['x'] par[par<0] = 0 par[par>1] = 1 res = dict(par = par.reshape(1,-1), value = crit_func(par,**crit_to_args[crit]), new = True, id = None) if control['tol_dist'] > 0 and control['tol_diff'] > 0: ## Check if new design is not to close to existing design dists = np.sqrt(euclidean_dist(res['par'].reshape(1,-1), model.X0)) if np.min(dists) < control['tol_dist']: argmin = np.argmin(dists) res = dict(par = model.X0[[argmin],:], value = crit_func(x = model.X0[argmin,:], **crit_to_args[crit]), new = False, id = argmin) else: ## Check if crit difference between replication and new design is significative id_closest = np.argmin(dists) # closest point to new design crit_rep = crit_func(x=model.X0[id_closest],**crit_to_args[crit]) if (res['value']-crit_rep)/res['value'] < control['tol_diff']: res = dict(par = model.X0[[id_closest],:], value = crit_rep, new = False, id = id_closest) else: ## Discrete print('here') res = -1 * crit_func(model=model,x=Xcand) tmp = (duplicated(np.vstack([model.X0, Xcand[np.argmin(res),:]]), fromLast = True)) if len(tmp) > 0: par = Xcand[res.argmin(keepdims=True),:] return(dict(par = par, value = crit_func(model=model,x=par), new = False, id = tmp)) par = Xcand[res.argmin(keepdims=True),:] return dict(par = par, value = crit_func(model=model,x=par), new = True, id = None) return res
[docs] def crit_optim(model, crit, h = 2, Xcand = None, control = dict(multi_start = 10, maxit = 100), seed = None, ncores = 1, **kwargs): ''' crit_optim ''' d = model.X0.shape[1] if crit == "crit_IMSPE": raise ValueError("crit_IMSPE is intended to be optimized by IMSPE_optim") ## A) Setting to beat: first new point then replicate h times crit_A = crit_search(model = model, crit = crit, control = control, Xcand = Xcand, seed = seed, ncores = ncores, **kwargs) new_designA = crit_A['par'] ## store first considered design to be added path_A = [crit_A] if h > 0: newmodelA = deepcopy(model) for i in range(1,h+1): ZnewA = newmodelA.predict(crit_A['par'])['mean'] newmodelA.update(Xnew = crit_A['par'], Znew = ZnewA, maxit = 0) crit_A = crit_search(model = newmodelA, crit = crit, replicate = True, control = control, seed = seed, ncores = ncores, **kwargs) path_A.append(crit_A) if h == -1: return dict(par = new_designA, value = crit_A['value'], path = path_A) newmodelB = deepcopy(model) if h == 0: crit_B = crit_search(model = newmodelB, crit = crit, replicate = True, control = control, ncores = ncores,**kwargs) new_designB = crit_B['par'] ## store considered design to be added # search from best replicate if Xcand is None: crit_C = crit_search(model = newmodelB, crit = crit, control = dict(Xstart = crit_B['par'], maxit = control['maxit'], tol_dist = control['tol_dist'], tol_diff = control['tol_diff']), ncores = ncores,**kwargs) else: crit_C = crit_B.copy() if crit_C['value'] > max(crit_A['value'], crit_B['value']): return dict(par = crit_C['par'], value = crit_C['value'], path = [crit_C]) if crit_B['value'] > crit_A['value']: return dict(par = crit_B['par'], value = crit_B['value'], path = [crit_B]) else: for i in range(h): ## Add new replicate crit_B = crit_search(model = newmodelB, crit = crit, replicate = True, control = control, ncores = ncores,**kwargs) if i == 0: new_designB = crit_B['par'].reshape(1,-1) ##store first considered design to add path_B = list() path_B.append(crit_B) ZnewB = newmodelB.predict(crit_B['par'])['mean'] newmodelB.update(Xnew = crit_B['par'], Znew = ZnewB, maxit = 0) ## Add new design crit_C = crit_search(model = newmodelB, crit = crit, control = control, Xcand = Xcand, ncores = ncores,**kwargs) path_C = [crit_C] if i < h: newmodelC = deepcopy(newmodelB) for j in range(i,h): ## Add remaining replicates ZnewC = newmodelC.predict(crit_C['par'])['mean'] newmodelC.update(Xnew = crit_C['par'], Znew = ZnewC, maxit = 0) crit_C = crit_search(model = newmodelC, crit = crit, replicate = True, control = control, ncores = ncores,**kwargs) path_C.append(crit_C) if crit_C['value'] < crit_A['value']: return dict(par = new_designB, value = crit_C['value'], path = path_B + path_C) return dict(par = new_designA, value = crit_A['value'], path = path_A)
[docs] def log1mexp(x): r""" References ---------- Maechler, Martin (2012). Accurately Computing log(1-exp(-|a|)). Assessed from the Rmpfr package. """ if np.isnan(x): print("x is NA") return np.nan if np.min(x) < 0: print("x < 0") return np.nan if x <= np.log(2): return np.log(-np.expm1(-x)) else: return np.log1p(-np.exp(-x))
#' log_h function from #' @references Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for bayesian optimization. Advances in Neural Information Processing Systems, 36.
[docs] def log_h(z, eps=np.finfo(float).eps): r""" References ---------- Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for Bayesian optimization. Advances in Neural Information Processing Systems, 36. """ c1 = np.log(2 * np.pi) / 2 c2 = np.log(np.pi / 2) / 2 if z > -1: return np.log(norm.pdf(z) + z * norm.cdf(z)) if z < -1 / np.sqrt(eps): return -z ** 2 / 2 - c1 - 2 * np.log(np.abs(z)) res = -z ** 2 / 2 - c1 + log1mexp(-(np.log(erfcx(-z / np.sqrt(2)) * np.abs(z)) + c2)) if np.isnan(res): res = -750 return res
[docs] def crit_logEI(x, model, cst = None, preds = None): r""" Logarithm of Expected Improvement (EI) criteria Parameters ---------- x: nd_arraylike model designs, one point per row model: hetgpy.hetGP hetGP or homGP model cst: float optional plugin value of the mean preds: Dict model predictions (optional) References ---------- Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for Bayesian optimization. Advances in Neural Information Processing Systems, 36. Examples -------- >>> from hetgpy.test_functions import f1d >>> from hetgpy.homGP import homGP >>> from hetgpy.optim import crit_logEI >>> import numpy as np >>> ftest = f1d >>> n_init = 5 # number of unique designs >>> X = np.linspace(0, 1, n_init).reshape(-1,1) >>> Z = ftest(X) >>> xgrid = np.linspace(0,1,51).reshape(-1,1) >>> model = homGP() >>> model.mle(X = X, Z = Z, lower = np.array([0.01]), upper = np.array([1]), known = dict(g = 2e-8)) >>> logEI = crit_logEI(xgrid, model, cst = model.Z0.min()) """ if cst is None: cst = np.min(model.predict(x=model["X0"])["mean"]) if len(x.shape) == 1: x = x.reshape(-1, model.X0.shape[1]) if preds is None: preds = model.predict(x=x) if type(model) == hetgpy.homTP or type(model) == hetgpy.hetTP: gamma = (cst - preds["mean"]) / np.sqrt(preds["sd2"]) res = (cst - preds["mean"]) * t.cdf(gamma, df=model["nu"] + len(model["Z"])) res = res + np.sqrt(preds["sd2"]) * (1 + (gamma**2 - 1) / (model["nu"] + len(model["Z"]) - 1)) * t.pdf(x=gamma, df=model["nu"] + len(model["Z"])) res[np.where(res < 1e-12)] = 0 # for stability return np.log(res) xcr = (cst - preds["mean"]) / np.sqrt(preds["sd2"]) res = np.log(np.sqrt(preds["sd2"])) for i in np.arange(x.shape[0]): res[i] = np.maximum(-800, res[i] + log_h(xcr[i])) res[preds["sd2"] == 0] = -800 return res
#' Derivative of log_h function from #' @references Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for bayesian optimization. Advances in Neural Information Processing Systems, 36.
[docs] def dlog_h(z, eps=np.finfo(float).eps): r""" References ---------- Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for Bayesian optimization. Advances in Neural Information Processing Systems, 36. """ c2 = np.log(np.pi / 2) / 2 if z > -1: return norm.cdf(z) / (norm.pdf(z) + z * norm.cdf(z)) if z < -1 / np.sqrt(eps): return -z - 2 / z # res = -z ** 2 / 2 - c1 + log1mexp(-(np.log(erfcx(-z / np.sqrt(2)) * np.abs(z)) + c2)) # res = -z + (np.exp(c2) * (np.sqrt(np.pi) * (z**2 + 1) * np.exp(z**2 / 2) * (erfc(z / np.sqrt(2)) - 2) - np.sqrt(2) * z)) / (np.sqrt(np.pi) * (z * np.exp((z**2 + 2 * c2) / 2) * (erfc(z / np.sqrt(2)) - 2) - 1)) res = -z + (np.exp(c2) * (-np.sqrt(np.pi) * (z**2 + 1) * erfcx(-z/np.sqrt(2)) - np.sqrt(2) * z)) / (np.sqrt(np.pi) * (z * np.exp(c2) * -erfcx(-z / np.sqrt(2)) - 1)) if np.isnan(res): res = 0 return res
[docs] def deriv_crit_logEI(x, model, cst=None, preds=None): r""" Derivative of the logarithm of Expected Improvement (EI) criteria Parameters ---------- x: nd_arraylike model designs, one point per row model: hetgpy.hetGP hetGP or homGP model cst: float optional plugin value of the mean preds: Dict model predictions (optional) References ---------- Ament, S., Daulton, S., Eriksson, D., Balandat, M., & Bakshy, E. (2024). Unexpected improvements to expected improvement for Bayesian optimization. Advances in Neural Information Processing Systems, 36. Examples -------- """ if cst is None: cst = np.min(model.predict(x=model["X0"])["mean"]) if len(x.shape) == 1: x = x.reshape(-1, model.X0.shape[1]) if preds is None: preds = model.predict(x=x) pred_gr = predict_gr(model, x) z = (cst - preds["mean"]) / np.sqrt(preds["sd2"]) if type(model) == homGP or type(model) == hetGP: ds = pred_gr["sd2"] / (2 * np.sqrt(preds["sd2"]).reshape(x.shape[0], x.shape[0]).T) dz = -pred_gr["mean"] / np.sqrt(preds["sd2"]) - z * ds / np.sqrt(preds["sd2"]) return dz * dlog_h(z) + ds / np.sqrt(preds["sd2"]) else: # dz = - dm/s - z ds/s = -dm/s - z * ds2/(2s2) dz = -pred_gr["mean"] / np.sqrt(preds["sd2"]) - z * pred_gr["sd2"] / (2 * preds["sd2"].reshape(x.shape[0], x.shape[0]).T) # d( (cst - m(x)).pt(z(x))) = p1 = -pred_gr["mean"] * t.cdf(z, df=model.nu + len(model.Z)) + (cst - preds["mean"]) * dz * t.pdf(z, df=model.nu + len(model.Z)) a = model.nu + len(model.Z) - 1 # d( s(x) (1 + (z^2-1)/(nu + N -1)) dt(z(x)) (in 2 lines) p2 = (pred_gr["sd2"] / (2 * np.sqrt(preds["sd2"])) * (1 + (z**2 - 1) / a) + 2 * np.sqrt(preds["sd2"]) * z * dz / a) * t.pdf(z, df=model.nu + len(model.Z)) p2 = p2 + np.sqrt(preds["sd2"]) * (1 + (z**2 - 1) / a) * dz * dlambda(z, model.nu + len(model.Z)) res = p1 + p2 res[np.abs(res) < 1e-12] = 0 # for stability with optim eitmp = (cst - preds["mean"]) * t.cdf(z, df=model["nu"] + len(model["Z"])) eitmp = eitmp + np.sqrt(preds["sd2"]) * (1 + (z**2 - 1) / (model["nu"] + len(model["Z"]) - 1)) * t.pdf(x=z, df=model["nu"] + len(model["Z"])) res = res/eitmp return res