Let's try generalizing our model now. For survival analysis, it is very useful to think about cumulative hazards - that is, modelling the cumulative hazard is typically easiest.
# Note: we are shifting the data 4 units to the right.
T = (np.random.exponential(size=1000)/1.5) ** 2.3 + 4
E = np.random.binomial(1, 0.95, size=1000)
from autograd import numpy as np
def cumulative_hazard(params, t):
lambda_, rho_, mu = params
return ((t - mu) / lambda_) ** rho_
def log_hazard(params, t):
lambda_, rho_, mu = params
return ... # this could get arbitrarly complicated.
from autograd import elementwise_grad
hazard = elementwise_grad(cumulative_hazard, argnum=1)
def log_hazard(params, t):
return np.log(hazard(params, t))
log_hazard(np.array([2., 2. , 0.]), np.array([1,2,3]))
array([-0.69314718, 0. , 0.40546511])
# same as previous
from autograd import value_and_grad
from scipy.optimize import minimize
def log_likelihood(params, t, e):
return np.sum(e * log_hazard(params, t)) - np.sum(cumulative_hazard(params, t))
def negative_log_likelihood(params, t, e):
return -log_likelihood(params, t, e)
results = minimize(
value_and_grad(negative_log_likelihood),
x0 = np.array([1.0, 1.0, 0.0]),
method=None,
args=(T, E),
jac=True,
bounds=((0.00001, None), (0.00001, None), (None, np.min(T)-0.001)))
print(results)
fun: 341.1505998865307 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64> jac: array([ 8.74288685e-06, -7.51244260e-04, -4.29427489e+04]) message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' nfev: 30 nit: 20 status: 0 success: True x: array([0.49171791, 0.48311375, 3.99900001])
So long as you are good at "parameter accounting", then you could create arbitrarly complicated survival models simply by specifying the hazard.
Extending to including covariates is straightforward too, with some modifications to the code. Here's a simple model of the cumulative hazard:
$$H(t; x) = \left(\frac{t}{\lambda(x)}\right)^\rho $$$$ \lambda(x) = \beta_1 x_1 + \beta_2 x_2 $$from scipy.stats import weibull_min
# create some regression data.
N = 10000
X = 0.1 * np.random.normal(size=(N, 2))
T = np.exp(2 * X[:, 0] + -2 * X[:, 1]) * weibull_min.rvs(1, scale=1, loc=0, size=N)
E = np.random.binomial(1, 0.99, size=N)
def cumulative_hazard(params, t, X):
log_lambda_coefs_, rho_ = params[:2], params[-1]
lambda_ = np.exp(np.dot(X, log_lambda_coefs_))
return (t / lambda_) ** rho_
# these functions are almost identical to above,
# expect they have an additional argument, X
hazard = elementwise_grad(cumulative_hazard, argnum=1)
def log_hazard(params, t, X):
return np.log(hazard(params, t, X))
def log_likelihood(params, t, e, X):
return np.sum(e * log_hazard(params, t, X)) - np.sum(cumulative_hazard(params, t, X))
def negative_log_likelihood(params, t, e, X):
return -log_likelihood(params, t, e, X)
results = minimize(
value_and_grad(negative_log_likelihood),
x0 = np.array([0.0, 0.0, 1.0]),
method=None,
args=(T, E, X),
jac=True,
bounds=((None, None), (None, None), (0.00001, None)))
print(results)
fun: 10012.505418738723 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64> jac: array([ 5.64816887e-06, -4.95439586e-05, 2.98904267e-03]) message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' nfev: 10 nit: 8 status: 0 success: True x: array([ 2.0380495 , -2.14856494, 0.99908182])
Great! Let's move onto part 5, how to estimate the variances of the parameters.