22.2 Sales Funnel

22.2.1 Example 1

This example is based on Sergey Bryl

\[ Awareness \to Interest \to Desire \to Action \]

Step in the funnel:

  • 0 step (necessary condition) – customer visits a site for the first time
  • 1st step (awareness) – visits two site’s pages
  • 2nd step (interest) – reviews a product page
  • 3rd step (desire) – adds a product to the shopping cart
  • 4th step (action) – completes purchase

Simulate data

library(tidyverse)
library(purrrlyr)
library(reshape2)
 
##### simulating the "real" data #####
set.seed(454)
df_raw <-
  data.frame(
    customer_id = paste0('id', sample(c(1:5000), replace = TRUE)),
    date = as.POSIXct(
      rbeta(10000, 0.7, 10) * 10000000,
      origin = '2017-01-01',
      tz = "UTC"
    ),
    channel = paste0('channel_', sample(
      c(0:7),
      10000,
      replace = TRUE,
      prob = c(0.2, 0.12, 0.03, 0.07, 0.15, 0.25, 0.1, 0.08)
    )),
    site_visit = 1
  ) %>%
  
  mutate(
    two_pages_visit = sample(c(0, 1), 10000, replace = TRUE, prob = c(0.8, 0.2)),
    product_page_visit = ifelse(
      two_pages_visit == 1,
      sample(
        c(0, 1),
        length(two_pages_visit[which(two_pages_visit == 1)]),
        replace = TRUE,
        prob = c(0.75, 0.25)
      ),
      0
    ),
    add_to_cart = ifelse(
      product_page_visit == 1,
      sample(
        c(0, 1),
        length(product_page_visit[which(product_page_visit == 1)]),
        replace = TRUE,
        prob = c(0.1, 0.9)
      ),
      0
    ),
    purchase = ifelse(add_to_cart == 1,
                      sample(
                        c(0, 1),
                        length(add_to_cart[which(add_to_cart == 1)]),
                        replace = TRUE,
                        prob = c(0.02, 0.98)
                      ),
                      0)
  ) %>%
  dmap_at(c('customer_id', 'channel'), as.character) %>%
  arrange(date) %>%
  mutate(session_id = row_number()) %>%
  arrange(customer_id, session_id)
df_raw <-
  reshape2::melt(
    df_raw,
    id.vars = c('customer_id', 'date', 'channel', 'session_id'),
    value.name = "trigger",
    variable.name = 'event'
  ) %>%
  filter(trigger == 1) %>%
  select(-trigger) %>%
  arrange(customer_id, date)

Preprocessing

### removing not first events ###
df_customers <- df_raw %>%
  group_by(customer_id, event) %>%
  filter(date == min(date)) %>%
  ungroup()

Assumption: all customers are first-time buyers. Hence, every next purchase as an event will be removed with the above code.

Calculate channel probability

### Sales Funnel probabilities ###
sf_probs <- df_customers %>%
    
    group_by(event) %>%
    summarise(customers_on_step = n()) %>%
    ungroup() %>%
    
    mutate(
        sf_probs = round(customers_on_step / customers_on_step[event == 'site_visit'], 3),
        sf_probs_step = round(customers_on_step / lag(customers_on_step), 3),
        sf_probs_step = ifelse(is.na(sf_probs_step) == TRUE, 1, sf_probs_step),
        sf_importance = 1 - sf_probs_step,
        sf_importance_weighted = sf_importance / sum(sf_importance)
    )

Visualization

### Sales Funnel visualization ###
df_customers_plot <- df_customers %>%
    
    group_by(event) %>%
    arrange(channel) %>%
    mutate(pl = row_number()) %>%
    ungroup() %>%
    
    mutate(
        pl_new = case_when(
            event == 'two_pages_visit' ~ round((max(pl[event == 'site_visit']) - max(pl[event == 'two_pages_visit'])) / 2),
            event == 'product_page_visit' ~ round((max(pl[event == 'site_visit']) - max(pl[event == 'product_page_visit'])) / 2),
            event == 'add_to_cart' ~ round((max(pl[event == 'site_visit']) - max(pl[event == 'add_to_cart'])) / 2),
            event == 'purchase' ~ round((max(pl[event == 'site_visit']) - max(pl[event == 'purchase'])) / 2),
            TRUE ~ 0
        ),
        pl = pl + pl_new
    )

df_customers_plot$event <-
    factor(
        df_customers_plot$event,
        levels = c(
            'purchase',
            'add_to_cart',
            'product_page_visit',
            'two_pages_visit',
            'site_visit'
        )
    )

# color palette
cols <- c(
    '#4e79a7',
    '#f28e2b',
    '#e15759',
    '#76b7b2',
    '#59a14f',
    '#edc948',
    '#b07aa1',
    '#ff9da7',
    '#9c755f',
    '#bab0ac'
)

ggplot(df_customers_plot, aes(x = event, y = pl)) +
    theme_minimal() +
    scale_colour_manual(values = cols) +
    coord_flip() +
    geom_line(aes(group = customer_id, color = as.factor(channel)), size = 0.05) +
    geom_text(
        data = sf_probs,
        aes(
            x = event,
            y = 1,
            label = paste0(sf_probs * 100, '%')
        ),
        size = 4,
        fontface = 'bold'
    ) +
    guides(color = guide_legend(override.aes = list(size = 2))) +
    theme(
        legend.position = 'bottom',
        legend.direction = "horizontal",
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        plot.title = element_text(
            size = 20,
            face = "bold",
            vjust = 2,
            color = 'black',
            lineheight = 0.8
        ),
        axis.title.y = element_text(size = 16, face = "bold"),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(
            size = 8,
            angle = 90,
            hjust = 0.5,
            vjust = 0.5,
            face = "plain"
        )
    ) +
    ggtitle("Sales Funnel visualization - all customers journeys")

Calculate attribution

### computing attribution ###
df_attrib <- df_customers %>%
    # removing customers without purchase
    group_by(customer_id) %>%
    filter(any(as.character(event) == 'purchase')) %>%
    ungroup() %>%
    
    # joining step's importances
    left_join(., sf_probs %>% select(event, sf_importance_weighted), by = 'event') %>%
    
    group_by(channel) %>%
    summarise(tot_attribution = sum(sf_importance_weighted)) %>%
    ungroup()

22.2.2 Example 2

Code from Sergey Bryl

library(dplyr)
library(ggplot2)
library(reshape2)

# creating a data samples
# content
df.content <- data.frame(
    content = c(
        'main',
        'ad landing',
        'product 1',
        'product 2',
        'product 3',
        'product 4',
        'shopping cart',
        'thank you page'
    ),
    step = c(
        'awareness',
        'awareness',
        'interest',
        'interest',
        'interest',
        'interest',
        'desire',
        'action'
    ),
    number = c(150000, 80000,
               80000, 40000, 35000, 25000,
               130000,
               120000)
)
# customers
df.customers <- data.frame(
    content = c('new', 'engaged', 'loyal'),
    step = c('new', 'engaged', 'loyal'),
    number = c(25000, 40000, 55000)
)
# combining two data sets
df.all <- rbind(df.content, df.customers)

# calculating dummies, max and min values of X for plotting
df.all <- df.all %>%
    group_by(step) %>%
    mutate(totnum = sum(number)) %>%
    ungroup() %>%
    mutate(dum = (max(totnum) - totnum) / 2,
           maxx = totnum + dum,
           minx = dum)

# data frame for plotting funnel lines
df.lines <- df.all %>%
    distinct(step, maxx, minx)

# data frame with dummies
df.dum <- df.all %>%
    distinct(step, dum) %>%
    mutate(content = 'dummy',
           number = dum) %>%
    select(content, step, number)

# data frame with rates
conv <- df.all$totnum[df.all$step == 'action']

df.rates <- df.all %>%
    distinct(step, totnum) %>%
    mutate(
        prevnum = lag(totnum),
        rate = ifelse(
            step == 'new' | step == 'engaged' | step == 'loyal',
            round(totnum / conv, 3),
            round(totnum / prevnum, 3)
        )
    ) %>%
    select(step, rate)
df.rates <- na.omit(df.rates)

# creting final data frame
df.all <- df.all %>%
    select(content, step, number)

df.all <- rbind(df.all, df.dum)

# defining order of steps
df.all$step <-
    factor(
        df.all$step,
        levels = c(
            'loyal',
            'engaged',
            'new',
            'action',
            'desire',
            'interest',
            'awareness'
        )
    )
df.all <- df.all %>%
    arrange(desc(step))
list1 <- df.all %>% distinct(content) %>%
    filter(content != 'dummy')
df.all$content <-
    factor(df.all$content, levels = c(as.character(list1$content), 'dummy'))

# calculating position of labels
df.all <- df.all %>%
    arrange(step, desc(content)) %>%
    group_by(step) %>%
    mutate(pos = cumsum(number) - 0.5 * number) %>%
    ungroup()

# creating custom palette with 'white' color for dummies
cols <- c(
    "#fec44f",
    "#fc9272",
    "#a1d99b",
    "#fee0d2",
    "#2ca25f",
    "#8856a7",
    "#43a2ca",
    "#fdbb84",
    "#e34a33",
    "#a6bddb",
    "#dd1c77",
    "#ffffff"
)

# plotting chart
ggplot() +
    theme_minimal() +
    coord_flip() +
    scale_fill_manual(values = cols) +
    geom_bar(
        data = df.all,
        aes(x = step, y = number, fill = content),
        stat = "identity",
        width = 1
    ) +
    geom_text(
        data = df.all[df.all$content != 'dummy',],
        aes(
            x = step,
            y = pos,
            label = paste0(content, '-', number / 1000, 'K')
        ),
        size = 4,
        color = 'white',
        fontface = "bold"
    ) +
    geom_ribbon(data = df.lines,
                aes(
                    x = step,
                    ymax = max(maxx),
                    ymin = maxx,
                    group = 1
                ),
                fill = 'white') +
    geom_line(
        data = df.lines,
        aes(x = step, y = maxx, group = 1),
        color = 'darkred',
        size = 4
    ) +
    geom_ribbon(data = df.lines,
                aes(
                    x = step,
                    ymax = minx,
                    ymin = min(minx),
                    group = 1
                ),
                fill = 'white') +
    geom_line(
        data = df.lines,
        aes(x = step, y = minx, group = 1),
        color = 'darkred',
        size = 4
    ) +
    geom_text(
        data = df.rates,
        aes(
            x = step,
            y = (df.lines$minx[-1]),
            label = paste0(rate * 100, '%')
        ),
        hjust = 1.2,
        color = 'darkblue',
        fontface = "bold"
    ) +
    theme(
        legend.position = 'none',
        axis.ticks = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank()
    )