15.2 funneljoin
blog: https://hookedondata.org/introducing-the-funneljoin-package/
video: https://www.youtube.com/watch?v=-n4XaYHDlG8
15.2.1 after_join()
library(funneljoin)
answers <- vroom::vroom("../data/rquestions/Answers.csv")
questions <- vroom::vroom("../data/rquestions/Questions.csv")
Look at the data:
(answers <- answers %>%
janitor::clean_names() %>%
filter(!is.na(owner_user_id)))
#> # A tibble: 636,662 x 7
#> id owner_user_id creation_date parent_id score is_accepted_ans~ body
#> <chr> <chr> <chr> <chr> <dbl> <lgl> <chr>
#> 1 79741 3259 2008-09-17T03~ 79709 -1 FALSE "<p>It's ~
#> 2 79768 6043 2008-09-17T03~ 79709 9 FALSE "<p>use v~
#> 3 79779 8002 2008-09-17T03~ 79709 0 FALSE "<p>Third~
#> 4 79827 14257 2008-09-17T03~ 79709 1 FALSE "<p>I'm n~
#> 5 79893 14928 2008-09-17T04~ 79709 6 FALSE "<p>Remem~
#> 6 83162 15842 2008-09-17T13~ 77434 70 FALSE "<p>If yo~
#> # ... with 636,656 more rows
(questions <- questions %>%
janitor::clean_names() %>%
filter(!is.na(owner_user_id)))
#> # A tibble: 857,160 x 6
#> id owner_user_id creation_date score title body
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 77434 14008 2008-09-16T21:~ 171 How to access t~ "<p>Suppose I hav~
#> 2 95007 15842 2008-09-18T17:~ 56 Explain the qua~ "<p>I've been mys~
#> 3 255697 1941213 2008-11-01T15:~ 4 Is there an R p~ "<p>I'm looking f~
#> 4 359438 2173 2008-12-11T14:~ 4 Optimization pa~ "<p>Does anyone k~
#> 5 439526 37751 2009-01-13T15:~ 23 Thinking in Vec~ "<p>I know that R~
#> 6 445059 37751 2009-01-14T23:~ 12 Vectorize my th~ "<p>So earlier I ~
#> # ... with 857,154 more rows
first_answer_after_first_question <- questions %>%
after_left_join(answers,
by_time = "creation_date",
by_user = "owner_user_id",
type = "first-firstafter",
suffix = c("_question", "_answer"))
first_answer_after_first_question
#> # A tibble: 429,900 x 12
#> id_question owner_user_id creation_date_q~ score_question title body_question
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 "strsplit(~ "'" ' fixed=TRUE))) <NA> "colnames(df~
#> 2 "(gsub('[\~ "'\"\"" ' input$txt)) "'\"~ <NA>
#> 3 "str_repla~ "'\\\\|'" ' ')) <NA> "</code></pr~
#> 4 "'-'" "']'" ' ' "'\"~ "f2=f1.lower~
#> 5 "'B'" "'C" ' 'D') <NA> "from <- ~
#> 6 "'" "'DE" ' 'CA "', ~ "print(Websi~
#> # ... with 429,894 more rows, and 6 more variables: id_answer <chr>,
#> # creation_date_answer <chr>, parent_id <chr>, score_answer <dbl>,
#> # is_accepted_answer <lgl>, body_answer <chr>
type
determines funnel types:
15.2.2 funnel in one table
Sometimes you have all the data you need in one table. For example, let’s look at this table of user activity on a website.
activity <- tibble::tribble(
~ "user_id", ~ "event", ~ "timestamp",
1, "landing", "2019-07-01",
1, "registration", "2019-07-02",
1, "purchase", "2019-07-07",
1, "purchase", "2019-07-10",
2, "landing", "2019-08-01",
2, "registration", "2019-08-15",
3, "landing", "2019-05-01",
3, "registration", "2019-06-01",
3, "purchase", "2019-06-04",
4, "landing", "2019-06-13"
)
activity %>%
funnel_start(moment_type = "landing",
moment = "event",
tstamp = "timestamp",
user = "user_id")
#> # A tibble: 4 x 2
#> user_id timestamp_landing
#> <dbl> <chr>
#> 1 1 2019-07-01
#> 2 2 2019-08-01
#> 3 3 2019-05-01
#> 4 4 2019-06-13
activity %>%
funnel_start(moment_type = "landing",
moment = "event",
tstamp = "timestamp",
user = "user_id") %>%
funnel_step(moment_type = "registration",
type = "first-firstafter")
#> # A tibble: 4 x 3
#> user_id timestamp_landing timestamp_registration
#> <dbl> <chr> <chr>
#> 1 3 2019-05-01 2019-06-01
#> 2 4 2019-06-13 <NA>
#> 3 1 2019-07-01 2019-07-02
#> 4 2 2019-08-01 2019-08-15
activity %>%
funnel_start(moment_type = "landing",
moment = "event",
tstamp = "timestamp",
user = "user_id") %>%
funnel_step(moment_type = "registration",
type = "first-firstafter") %>%
funnel_step(moment_type = "purchase",
type = "first-firstafter")
#> # A tibble: 4 x 4
#> user_id timestamp_landing timestamp_registration timestamp_purchase
#> <dbl> <chr> <chr> <chr>
#> 1 3 2019-05-01 2019-06-01 2019-06-04
#> 2 1 2019-07-01 2019-07-02 2019-07-07
#> 3 2 2019-08-01 2019-08-15 <NA>
#> 4 4 2019-06-13 <NA> <NA>