15.2 funneljoin
blog: https://hookedondata.org/introducing-the-funneljoin-package/
video: https://www.youtube.com/watch?v=-n4XaYHDlG8
15.2.1 after_join()
library(funneljoin)
answers <- vroom::vroom("../data/rquestions/Answers.csv") 
questions <- vroom::vroom("../data/rquestions/Questions.csv")Look at the data:
(answers <- answers %>% 
  janitor::clean_names() %>% 
  filter(!is.na(owner_user_id)))
#> # A tibble: 636,662 x 7
#>   id    owner_user_id creation_date  parent_id score is_accepted_ans~ body      
#>   <chr> <chr>         <chr>          <chr>     <dbl> <lgl>            <chr>     
#> 1 79741 3259          2008-09-17T03~ 79709        -1 FALSE            "<p>It's ~
#> 2 79768 6043          2008-09-17T03~ 79709         9 FALSE            "<p>use v~
#> 3 79779 8002          2008-09-17T03~ 79709         0 FALSE            "<p>Third~
#> 4 79827 14257         2008-09-17T03~ 79709         1 FALSE            "<p>I'm n~
#> 5 79893 14928         2008-09-17T04~ 79709         6 FALSE            "<p>Remem~
#> 6 83162 15842         2008-09-17T13~ 77434        70 FALSE            "<p>If yo~
#> # ... with 636,656 more rows
(questions <- questions %>% 
  janitor::clean_names() %>% 
  filter(!is.na(owner_user_id)))
#> # A tibble: 857,160 x 6
#>   id     owner_user_id creation_date   score title            body              
#>   <chr>  <chr>         <chr>           <chr> <chr>            <chr>             
#> 1 77434  14008         2008-09-16T21:~ 171   How to access t~ "<p>Suppose I hav~
#> 2 95007  15842         2008-09-18T17:~ 56    Explain the qua~ "<p>I've been mys~
#> 3 255697 1941213       2008-11-01T15:~ 4     Is there an R p~ "<p>I'm looking f~
#> 4 359438 2173          2008-12-11T14:~ 4     Optimization pa~ "<p>Does anyone k~
#> 5 439526 37751         2009-01-13T15:~ 23    Thinking in Vec~ "<p>I know that R~
#> 6 445059 37751         2009-01-14T23:~ 12    Vectorize my th~ "<p>So earlier I ~
#> # ... with 857,154 more rowsfirst_answer_after_first_question <- questions %>%
  after_left_join(answers,
             by_time = "creation_date",
             by_user = "owner_user_id",
             type = "first-firstafter", 
             suffix = c("_question", "_answer")) 
first_answer_after_first_question
#> # A tibble: 429,900 x 12
#>   id_question owner_user_id creation_date_q~ score_question title body_question
#>   <chr>       <chr>         <chr>            <chr>          <chr> <chr>        
#> 1 "strsplit(~ "'"           '                fixed=TRUE)))   <NA> "colnames(df~
#> 2 "(gsub('[\~ "'\"\""       '                input$txt))    "'\"~  <NA>        
#> 3 "str_repla~ "'\\\\|'"     '                '))             <NA> "</code></pr~
#> 4 "'-'"       "']'"         '                '              "'\"~ "f2=f1.lower~
#> 5 "'B'"       "'C"          '                'D')            <NA> "from <- ~
#> 6 "'"         "'DE"         '                'CA            "', ~ "print(Websi~
#> # ... with 429,894 more rows, and 6 more variables: id_answer <chr>,
#> #   creation_date_answer <chr>, parent_id <chr>, score_answer <dbl>,
#> #   is_accepted_answer <lgl>, body_answer <chr>type determines funnel types:

15.2.2 funnel in one table
Sometimes you have all the data you need in one table. For example, let’s look at this table of user activity on a website.
activity <- tibble::tribble(
  ~ "user_id", ~ "event", ~ "timestamp",
  1, "landing", "2019-07-01",
  1, "registration", "2019-07-02",
  1, "purchase", "2019-07-07",
  1, "purchase", "2019-07-10",
  2, "landing", "2019-08-01",
  2, "registration", "2019-08-15",
  3, "landing", "2019-05-01",
  3, "registration", "2019-06-01",
  3, "purchase", "2019-06-04",
  4, "landing", "2019-06-13"
)activity %>%
  funnel_start(moment_type = "landing", 
               moment = "event", 
               tstamp = "timestamp", 
               user = "user_id")
#> # A tibble: 4 x 2
#>   user_id timestamp_landing
#>     <dbl> <chr>            
#> 1       1 2019-07-01       
#> 2       2 2019-08-01       
#> 3       3 2019-05-01       
#> 4       4 2019-06-13activity %>%
  funnel_start(moment_type = "landing", 
               moment = "event", 
               tstamp = "timestamp", 
               user = "user_id") %>%
  funnel_step(moment_type = "registration",
              type = "first-firstafter")
#> # A tibble: 4 x 3
#>   user_id timestamp_landing timestamp_registration
#>     <dbl> <chr>             <chr>                 
#> 1       3 2019-05-01        2019-06-01            
#> 2       4 2019-06-13        <NA>                  
#> 3       1 2019-07-01        2019-07-02            
#> 4       2 2019-08-01        2019-08-15activity %>%
  funnel_start(moment_type = "landing", 
               moment = "event", 
               tstamp = "timestamp", 
               user = "user_id") %>%
  funnel_step(moment_type = "registration",
              type = "first-firstafter") %>% 
  funnel_step(moment_type = "purchase",
              type = "first-firstafter")
#> # A tibble: 4 x 4
#>   user_id timestamp_landing timestamp_registration timestamp_purchase
#>     <dbl> <chr>             <chr>                  <chr>             
#> 1       3 2019-05-01        2019-06-01             2019-06-04        
#> 2       1 2019-07-01        2019-07-02             2019-07-07        
#> 3       2 2019-08-01        2019-08-15             <NA>              
#> 4       4 2019-06-13        <NA>                   <NA>