The data show the length of remission in weeks for two groups of leukemia patients, treated and control, and were analyzed by Cox in his original proportional hazards paper. The data are available in a file containing three columns:
Thus, the third and fourth observations, 6 and 6+, corresponding to a death and a censored observation at six weeks, are coded 6, 1 and 6, 0, respectively.
from lifelines.estimation import KaplanMeierFitter, NelsonAalenFitter
import pandas as pd
import numpy as np
%pylab inline
figsize(12.5,6)
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy
data = pd.read_csv('../lifelines/datasets/gehan.dat', sep="\s{1,3}", header=None)
/Users/camerondavidson-pilon/.virtualenvs/data/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'. if __name__ == '__main__':
data.tail()
0 | 1 | 2 | |
---|---|---|---|
37 | 1 | 12 | 1 |
38 | 1 | 15 | 1 |
39 | 1 | 17 | 1 |
40 | 1 | 22 | 1 |
41 | 1 | 23 | 1 |
data = data.dropna()
print data.head()
0 1 2 0 2 6 1 1 2 6 1 2 2 6 1 3 2 6 0 4 2 7 1
treatment = data[0] == 2
T = data[1]
E = data[2]
t = np.linspace(0,40,150)
kmf = KaplanMeierFitter()
kmf.fit(T[treatment], timeline=t, event_observed=E[treatment], label='With treatment')
ax = kmf.plot()
kmf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
kmf.plot(ax=ax, c="r")
ylim(0,1.05)
(0, 1.05)
naf = NelsonAalenFitter()
naf.fit(T[treatment],timeline=t, event_observed=E[treatment], label="With treatment")
ax = naf.cumulative_hazard_.plot()
naf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
ax = naf.cumulative_hazard_.plot(ax=ax)
from lifelines import AalenAdditiveFitter
aaf = AalenAdditiveFitter()
aaf.fit(data, duration_col=1, event_col=2, timeline=t)
[-----------------100%-----------------] 30 of 30 complete in 0.0 sec
<lifelines.AalenAdditiveFitter: fitted with 42 observations, 12 censored>
aaf.cumulative_hazards_.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x109287790>