13.1 Example

This example is subscription customers by Sergey Bryl’ in which he adapted model Fader and Hardie (2007)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(MLmetrics)

## 
## Attaching package: 'MLmetrics'
## 
## The following object is masked from 'package:base':
## 
##     Recall

# retention rate data
df_ret <- data.frame(month_lt = c(0:7),
                     case01 = c(1, .531, .452, .423, .394, .375, .356, .346),
                     case02 = c(1, .869, .743, .653, .593, .551, .517, .491),
                     case03 = c(1, .677, .562, .486, .412, .359, .332, .310),
                     case04 = c(1, .631, .468, .382, .326, .289, .262, .241)
                     ) %>%
        melt(., id.vars = c('month_lt'), variable.name = 'example', value.name = 'retention_rate')
 
ggplot(df_ret, aes(x = month_lt, y = retention_rate, group = example, color = example)) +
        theme_minimal() +
        facet_wrap(~ example) +
        scale_color_manual(values = c('#4e79a7', '#f28e2b', '#e15759', '#76b7b2')) +
        geom_line() +
        geom_point() +
        theme(plot.title = element_text(size = 20, face = 'bold', vjust = 2, hjust = 0.5),
              axis.text.x = element_text(size = 8, hjust = 0.5, vjust = .5, face = 'plain'),
              strip.text = element_text(face = 'bold', size = 12)) +
        ggtitle('Retention Rate')

Prediciton when we only have values on certain months

# functions for sBG distribution
churnBG <- Vectorize(function(alpha, beta, period) {
        t1 = alpha / (alpha + beta)
        result = t1
        if (period > 1) {
                result = churnBG(alpha, beta, period - 1) * (beta + period - 2) / (alpha + beta + period - 1)
        }
        return(result)
}, vectorize.args = c("period"))
 
survivalBG <- Vectorize(function(alpha, beta, period) {
        t1 = 1 - churnBG(alpha, beta, 1)
        result = t1
        if(period > 1){
                result = survivalBG(alpha, beta, period - 1) - churnBG(alpha, beta, period)
        }
        return(result)
}, vectorize.args = c("period"))
 
MLL <- function(alphabeta) {
        if(length(activeCust) != length(lostCust)) {
                stop("Variables activeCust and lostCust have different lengths: ",
                     length(activeCust), " and ", length(lostCust), ".")
        }
        t = length(activeCust) # number of periods
        alpha = alphabeta[1]
        beta = alphabeta[2]
        return(-as.numeric(
                sum(lostCust * log(churnBG(alpha, beta, 1:t))) +
                        activeCust[t]*log(survivalBG(alpha, beta, t))
        ))
}
 
 
df_ret <- df_ret %>%
        group_by(example) %>%
        mutate(activeCust = 1000 * retention_rate,
               lostCust = lag(activeCust) - activeCust,
               lostCust = ifelse(is.na(lostCust), 0, lostCust)) %>%
        ungroup()
 
         
ret_preds01 <- vector('list', 7)
for (i in c(1:7)) {
         
        df_ret_filt <- df_ret %>%
                filter(between(month_lt, 1, i) == TRUE & example == 'case01')
         
        activeCust <- c(df_ret_filt$activeCust)
        lostCust <- c(df_ret_filt$lostCust)
         
        opt <- optim(c(1, 1), MLL)
        retention_pred <- round(c(1, survivalBG(alpha = opt$par[1], beta = opt$par[2], c(1:7))), 3)
         
        df_pred <- data.frame(month_lt = c(0:7),
                              example = 'case01',
                              fact_months = i,
                              retention_pred = retention_pred)
        ret_preds01[[i]] <- df_pred
}

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(survivalBG(alpha, beta, t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(survivalBG(alpha, beta, t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(survivalBG(alpha, beta, t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(survivalBG(alpha, beta, t)): NaNs produced

ret_preds01 <- as.data.frame(do.call('rbind', ret_preds01))
 
 
ret_preds02 <- vector('list', 7)
for (i in c(1:7)) {
         
        df_ret_filt <- df_ret %>%
                filter(between(month_lt, 1, i) == TRUE & example == 'case02')
         
        activeCust <- c(df_ret_filt$activeCust)
        lostCust <- c(df_ret_filt$lostCust)
         
        opt <- optim(c(1, 1), MLL)
        retention_pred <- round(c(1, survivalBG(alpha = opt$par[1], beta = opt$par[2], c(1:7))), 3)
         
        df_pred <- data.frame(month_lt = c(0:7),
                              example = 'case02',
                              fact_months = i,
                              retention_pred = retention_pred)
        ret_preds02[[i]] <- df_pred
}

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

ret_preds02 <- as.data.frame(do.call('rbind', ret_preds02))
 
 
ret_preds03 <- vector('list', 7)
for (i in c(1:7)) {
         
        df_ret_filt <- df_ret %>%
                filter(between(month_lt, 1, i) == TRUE & example == 'case03')
         
        activeCust <- c(df_ret_filt$activeCust)
        lostCust <- c(df_ret_filt$lostCust)
         
        opt <- optim(c(1, 1), MLL)
        retention_pred <- round(c(1, survivalBG(alpha = opt$par[1], beta = opt$par[2], c(1:7))), 3)
         
        df_pred <- data.frame(month_lt = c(0:7),
                              example = 'case03',
                              fact_months = i,
                              retention_pred = retention_pred)
        ret_preds03[[i]] <- df_pred
}
 
ret_preds03 <- as.data.frame(do.call('rbind', ret_preds03))
 
 
ret_preds04 <- vector('list', 7)
for (i in c(1:7)) {
         
        df_ret_filt <- df_ret %>%
                filter(between(month_lt, 1, i) == TRUE & example == 'case04')
         
        activeCust <- c(df_ret_filt$activeCust)
        lostCust <- c(df_ret_filt$lostCust)
         
        opt <- optim(c(1, 1), MLL)
        retention_pred <- round(c(1, survivalBG(alpha = opt$par[1], beta = opt$par[2], c(1:7))), 3)
         
        df_pred <- data.frame(month_lt = c(0:7),
                              example = 'case04',
                              fact_months = i,
                              retention_pred = retention_pred)
        ret_preds04[[i]] <- df_pred
}
 
ret_preds04 <- as.data.frame(do.call('rbind', ret_preds04))
 
 
ret_preds <- bind_rows(ret_preds01, ret_preds02, ret_preds03, ret_preds04)
 
 
 
 
df_ret_all <- df_ret %>%
        select(month_lt, example, retention_rate) %>%
        left_join(., ret_preds, by = c('month_lt', 'example'))
 
ggplot(df_ret_all, aes(x = month_lt, y = retention_rate, group = example, color = example)) +
        theme_minimal() +
        facet_wrap(~ example) +
        scale_color_manual(values = c('#4e79a7', '#f28e2b', '#e15759', '#76b7b2')) +
        geom_line(size = 1.5) +
        geom_point(size = 1.5) +
        geom_line(aes(y = retention_pred, group = fact_months), alpha = 0.5) +
        theme(plot.title = element_text(size = 20, face = 'bold', vjust = 2, hjust = 0.5),
              axis.text.x = element_text(size = 8, hjust = 0.5, vjust = .5, face = 'plain'),
              strip.text = element_text(face = 'bold', size = 12)) +
        ggtitle('Retention Rate Projections')

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

calculate the average LTV for case03 based on two historical months with a forecast horizon of 24 months and a subscription price of $1

### LTV prediction ###
df_ltv_03 <- df_ret %>%
        filter(between(month_lt, 1, 2) == TRUE & example == 'case03')
 
activeCust <- c(df_ltv_03$activeCust)
lostCust <- c(df_ltv_03$lostCust)
 
opt <- optim(c(1, 1), MLL)
retention_pred <- round(c(survivalBG(alpha = opt$par[1], beta = opt$par[2], c(3:24))), 3)
 
df_pred <- data.frame(month_lt = c(3:24),
                      retention_pred = retention_pred)
 
df_ltv_03 <- df_ret %>%
        filter(between(month_lt, 0, 2) == TRUE & example == 'case03') %>%
        select(month_lt, retention_rate) %>%
        bind_rows(., df_pred) %>%
        mutate(retention_rate_calc = ifelse(is.na(retention_rate), retention_pred, retention_rate),
               ltv_monthly = retention_rate_calc * 1,
               ltv_cum = round(cumsum(ltv_monthly), 2))
# average LTV of $9.33. actual data for the observed periods (from 0 to 2nd months) and the predicted retention for the future periods (from 3rd to 24th months)