start with fitting data to a parametric model
so we need to create the log-likehood
let's create some fake data first.
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
T = (np.random.exponential(size=1000)/1.5) ** 2.3
plt.hist(T, bins=50);
Let's use the Weibull parametric model: https://en.wikipedia.org/wiki/Weibull_distribution
def pdf(params, t):
lambda_, rho_ = params
# I'm not going to make you type this out - it's annoying and error prone.
return rho_ / lambda_ * (t / lambda_) ** (rho_ - 1) * np.exp(-(t/lambda_) ** rho_)
# okay, but we actually need the _log_ of the pdf
def log_pdf(params, t):
lambda_, rho_ = params
# I'm not going to make you type this out - it's annoying and error prone.
return np.log(rho_) - np.log(lambda_) + (rho_ - 1) * (np.log(t) - np.log(lambda_)) - (t/lambda_) ** rho_
# now we can define the log likehood
def log_likelihood(params, t):
return np.sum(log_pdf(params, t))
scipy.optimize.minimize
¶from scipy.optimize import minimize
results = minimize(log_likelihood,
x0 = np.array([1.0, 1.0]), # some initial guess of the parameters.
method=None,
args=(T, ))
print(results)
fun: nan hess_inv: array([[1, 0], [0, 1]]) jac: array([nan, nan]) message: 'Desired error not necessarily achieved due to precision loss.' nfev: 448 nit: 1 njev: 112 status: 2 success: False x: array([ -29.10248229, 1034.80182732])
/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power # Remove the CWD from sys.path while we load stuff.
def log_likelihood(params, t):
return -np.sum(log_pdf(params, t))
results = minimize(log_likelihood,
x0 = np.array([1.0, 1.0]), # some initial guess of the parameters.
method=None,
args=(T, ))
print(results)
fun: nan hess_inv: array([[1, 0], [0, 1]]) jac: array([nan, nan]) message: 'Desired error not necessarily achieved due to precision loss.' nfev: 448 nit: 1 njev: 112 status: 2 success: False x: array([ 31.10248229, -1032.80182732])
/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power # Remove the CWD from sys.path while we load stuff.
# weibull parameters must be greater than 0!
# we can "nudge" the minimizer to understand this using the bounds argument
results = minimize(log_likelihood,
x0 = np.array([1.0, 1.0]),
method=None,
args=(T, ),
bounds=((0, None), (0, None)))
print(results)
fun: nan hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64> jac: array([ 59.76914963, 2616.4018891 ]) message: b'ABNORMAL_TERMINATION_IN_LNSRCH' nfev: 129 nit: 1 status: 2 success: False x: array([1.16054112, 0.99805959])
/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in log # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in add # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in double_scalars # Remove the CWD from sys.path while we load stuff.
minimize
is a very flexible function for small-mid size parameter optimization¶Let's move to Part 2.