from lifelines.estimation import AalenAdditiveFitter
import pandas as pd
import numpy as np
import patsy
%pylab inline
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy
data = pd.read_csv('../lifelines/datasets/divorce.dat', sep="\s{2,10}")
/Users/camerondavidson-pilon/.virtualenvs/data/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'. if __name__ == '__main__':
data.head(10)
id | heduc | heblack | mixed | years | div | |
---|---|---|---|---|---|---|
0 | 9 | 12-15 years | No | No | 10.546 | No |
1 | 11 | < 12 years | No | No | 34.943 | No |
2 | 13 | < 12 years | No | No | 2.834 | Yes |
3 | 15 | < 12 years | No | No | 17.532 | Yes |
4 | 33 | 12-15 years | No | No | 1.418 | No |
5 | 36 | < 12 years | No | No | 48.033 | No |
6 | 43 | 16+ years | No | No | 16.706 | No |
7 | 47 | < 12 years | No | No | 24.999 | No |
8 | 50 | < 12 years | No | No | 24.999 | No |
9 | 56 | < 12 years | Yes | No | 3.869 | No |
df = patsy.dmatrix('heduc + heblack + heblack:mixed + years + mixed + div -1 ', data, return_type='dataframe')
df.head()
heduc[12-15 years] | heduc[16+ years] | heduc[< 12 years] | heblack[T.Yes] | mixed[T.Yes] | div[T.Yes] | heblack[T.Yes]:mixed[T.Yes] | years | |
---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 10.546 |
1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 34.943 |
2 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 2.834 |
3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 17.532 |
4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1.418 |
aaf = AalenAdditiveFitter(fit_intercept=True, coef_penalizer=1.)
df.columns
Index([u'heduc[12-15 years]', u'heduc[16+ years]', u'heduc[< 12 years]', u'heblack[T.Yes]', u'mixed[T.Yes]', u'div[T.Yes]', u'heblack[T.Yes]:mixed[T.Yes]', u'years'], dtype='object')
timeline = np.linspace(0, 35, 1000)
aaf.fit(df, 'years', event_col='div[T.Yes]', timeline=timeline)
[-----------------100%-----------------] 1032 of 1032 complete in 1.1 sec
<lifelines.AalenAdditiveFitter: fitted with 3371 observations, 2339 censored>
figsize(12.5, 8.5)
#aaf.cumulative_hazards_.ix[:][["baseline"]].plot()
#aaf.cumulative_hazards_.ix[:][["bb","bw","wb","ww"]].plot()
aaf.cumulative_hazards_.plot()
plt.legend(loc='upper left')
<matplotlib.legend.Legend at 0x11b491d50>