5.3 综合运用

5.3.1 日报同环比计算

• 常用函数

round_date()函数根据要求周期回滚日期

floor_date(today(),unit = 'year')
#> [1] "2021-01-01"
floor_date(today(),unit = 'month')
#> [1] "2021-06-01"
floor_date(today(),unit = 'week')
#> [1] "2021-06-20"

round_date(
x,
unit = "second",
week_start = getOption("lubridate.week.start", 7)
)

floor_date(
x,
unit = "seconds",
week_start = getOption("lubridate.week.start", 7)
)

ceiling_date(
x,
unit = "seconds",
change_on_boundary = NULL,
week_start = getOption("lubridate.week.start", 7)
)

change_on_boundary参数

ceiling_date(ymd_hms('2021-01-1 00:00:00'),'month')
#> [1] "2021-01-01 UTC"
ceiling_date(ymd_hms('2021-01-1 00:00:00'),'month',change_on_boundary = T)
#> [1] "2021-02-01 UTC"
• 计算年同比
n <- 1
date <- today()
# current
current_start_date <-  floor_date(date,unit = 'year')
current_start_date
#> [1] "2021-01-01"
date
#> [1] "2021-06-23"

# last year
last_start_date <- floor_date(date,unit = 'year') %m-% years(n)
last_start_date
#> [1] "2020-01-01"
last_end_date <- date %m-% years(n)
last_end_date
#> [1] "2020-06-23"

• 计算月同比

rollback()函数返回上个月的最后一天或当前月的第一天

rollback(today())
#> [1] "2021-05-31"
rollback(today(),roll_to_first = TRUE)
#> [1] "2021-06-01"
• 计算月环比

as_date('2020-03-30') %m-% months(1)
#> [1] "2020-02-29"

# 环比月截止日
today()
#> [1] "2021-06-23"
today() %m-% months(1)
#> [1] "2021-05-23"

• 模拟计算
# 构造数据
bill_date <- as_date((as_date('2019-01-01'):as_date('2020-12-01')))
area <-  sample(c('华东','华西','华南','华北'),size = length(bill_date),replace = TRUE)
category <- sample(c('品类A','品类B','品类C','品类D'),size = length(bill_date),replace = TRUE)
dt <- tibble::tibble(bill_date = bill_date ,money = sample(80:150,size = length(bill_date),replace = TRUE),area = area,category = category)
#> # A tibble: 6 x 4
#>   bill_date  money area  category
#>   <date>     <int> <chr> <chr>
#> 1 2019-01-01   138 华东  品类C
#> 2 2019-01-02    80 华南  品类D
#> 3 2019-01-03   111 华北  品类B
#> 4 2019-01-04    97 华南  品类C
#> 5 2019-01-05   126 华东  品类D
#> 6 2019-01-06   149 华西  品类A
• 自定义函数
library(dplyr,warn.conflicts = FALSE)
library(lubridate)
y_to_y <- function(.dt,date,n = 1,...){

date <- ymd(date)

if(is.na(date)){
stop('请输入正确日期格式，如20200101')
}

# current
current_start_date <-  floor_date(date,unit = 'year')

# last year
last_start_date <- floor_date(date,unit = 'year') %m-% years(n)
last_end_date <- date %m-% years(n)

.dt %>% mutate( 类型 = case_when(between(bill_date,current_start_date,date) ~ "当前",
between(bill_date,last_start_date,last_end_date) ~ "同期",
TRUE ~ "其他")) %>%
filter(类型 != "其他") %>%
group_by(...) %>%
summarise(金额 = sum(money,na.rm = TRUE)) %>%
ungroup()

#%>% pivot_wider(names_from = '类型',values_from = '金额')

}
y_to_y(dt,date = '20201001',n = 1,area,类型) %>%
tidyr::pivot_wider(id_cols = 'area',names_from = '类型',values_from = '金额') %>%
mutate(增长率 = 当前 / 同期)
#> summarise() has grouped output by 'area'. You can override using the .groups argument.
#> # A tibble: 4 x 4
#>   area   当前  同期 增长率
#>   <chr> <int> <int>  <dbl>
#> 1 华北   7572  7674  0.987
#> 2 华东   8713  8344  1.04
#> 3 华南   6279  8150  0.770
#> 4 华西   8805  7161  1.23

y_to_y(dt,date = '20201001',n = 1,area,类型,category) %>%
tidyr::pivot_wider(id_cols = c('area','category'),names_from = '类型',values_from = '金额') %>%
mutate(增长率 = 当前 / 同期)
#> summarise() has grouped output by 'area', '类型'. You can override using the .groups argument.
#> # A tibble: 16 x 5
#>   area  category  当前  同期 增长率
#>   <chr> <chr>    <int> <int>  <dbl>
#> 1 华北  品类A     1515  1268  1.19
#> 2 华北  品类B     1807  2600  0.695
#> 3 华北  品类C     1956  1837  1.06
#> 4 华北  品类D     2294  1969  1.17
#> 5 华东  品类A     1797  2400  0.749
#> 6 华东  品类B     2507  2064  1.21
#> # ... with 10 more rows

5.3.2 清洗不同类型日期格式

library(lubridate)
library(tidyverse)

date1 <- c('2001/2/13 10:33','1/24/13 11:16')

myfun <- function(x){

n_length <- length(x)
res <- vector(length = n_length)

for(i in 1:n_length){
n <- strsplit(x[i],'/') %>% [[(1) %>% [[(1)
if(str_length(n)==4){
res[i] <- ymd_hm(x[i],tz = 'Asia/Shanghai')
} else {
res[i] <- mdy_hm(x[i],tz = 'Asia/Shanghai')
}
}
as_datetime(res,tz = 'Asia/Shanghai')
}

myfun(date1)
#> [1] "2001-02-13 10:33:00 CST" "2013-01-24 11:16:00 CST"

5.3.3 扫码后中奖时间匹配

testfun <- function(x,y){
result <- data.frame() #应采用列表存储结果向量化
n  <-  length(x)
for( i in 1:n){
res <- x[i]-y
res <- abs(res) %>% which.min() #本处不对，应该判断res大于0的部分中谁最小
kong <- data.frame(中奖时间 = x[i],扫的时间 = y[res])
result <- rbind(kong,result)

}
return(result)
}
res <- testfun(dt$时间,scan_dt$时间)

testfun <- function(x,y){
n  <-  length(x)
result <- list()

for( i in 1:n){
y <- y[x>y]
res <- x[i]-y
res <- res %>% which.min()
kong <- data.frame(中奖时间 = x[i],扫的时间 = y[res])
result[[i]] <- kong
}
return(result)
}

res <- testfun(dt$时间,scan_dt$时间)

testfun <- function(dt){

x <- dt$中奖时间 y <- dt$扫的时间
n  <-  length(x)
result <- list()

for( i in 1:n){
y <- y[x>y]
res <- x[i]-y
res <- res %>% which.min()
kong <- data.frame(中奖时间 = x[i],扫的时间 = y[res])
result[[i]] <- kong
}
result <- dplyr::bind_rows(result)
return(result)
}
dtlist <- split(alldt,'客户ID')
purrr::map_dfr(dtlist,testfun)