#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'") from matplotlib import pyplot as plt import pandas as pd # In[2]: churn_data = pd.read_csv('https://raw.githubusercontent.com/' 'treselle-systems/customer_churn_analysis/' 'master/WA_Fn-UseC_-Telco-Customer-Churn.csv') churn_data.head() # In[3]: churn_data = churn_data.set_index('customerID') churn_data = churn_data.drop(['TotalCharges'], axis=1) # The dataset is naturally hierarchical: some columns only apply to some users. Ex, if you don't have internet # then the column OnlineBackup isn't applicable, as it's value is "No internet service". We # are going to map this back to No. We will treat the hierachical nature by stratifying on the # different services a user may have. churn_data = churn_data.applymap(lambda x: "No" if str(x).startswith("No ") else x) churn_data['Churn'] = (churn_data['Churn'] == "Yes") strata_cols = ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService'] print(churn_data.columns) # In[4]: from lifelines import CoxPHFitter cph = CoxPHFitter().fit(churn_data, 'tenure', 'Churn', formula="gender + SeniorCitizen + Partner + Dependents + MultipleLines + OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + Contract + PaperlessBilling + PaymentMethod + MonthlyCharges", strata=strata_cols) # In[5]: cph # In[6]: cph.print_summary() # In[7]: ax = plt.subplots(figsize=(8, 6)) cph.plot(ax=ax[1]) # In[8]: cph.plot_partial_effects_on_outcome('Contract', values=["Month-to-month", "One year", "Two year"], plot_baseline=False); # In[ ]: # In[ ]: