# Runner examples

The most fundamental function in runner package is runner. With runner::runner one can apply any R function on running windows. This tutorial presents set of examples explaining how to tackle some tasks. Some of the examples are referenced to original topic on stack-overflow.

### Number of unique elements in 7 days window

library(runner)

x <- sample(letters, 20, replace = TRUE)
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
x,
k = "7 days",
idx = date,
f = function(x) length(unique(x))
)
##  [1] 1 2 3 2 2 2 2 3 3 4 4 3 3 3 4 3 2 2 2 2

### weekly trimmed mean

library(runner)

x <- cumsum(rnorm(20))
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
x,
k = "week",
idx = date,
f = function(x) mean(x, trim = 0.05)
)
##  [1] -1.25749691 -1.64713165 -2.02798213 -2.69369499 -3.18110392 -3.29694938
##  [7] -4.05063663 -3.85160954 -3.19867633 -2.57709721 -1.65170984 -1.54691603
## [13] -1.50351450 -1.02189013 -0.70260513 -0.14853595 -0.07376711 -0.54787677
## [19] -2.51124034 -2.29345682

### Prediction on current day based on preceding 2-weeks regression

library(runner)

# sample data
x <- cumsum(rnorm(20))
data <- data.frame(
date = Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)), # unequaly spaced time series,
y = 3 * x + rnorm(20),
x = cumsum(rnorm(20))
)

# solution
data$pred <- runner( data, lag = "1 days", k = "2 weeks", idx = date, f = function(data) { predict( lm(y ~ x, data = data) )[nrow(data)] } ) plot(data$date, data$y, type = "l", col = "red") lines(data$date, data$pred, col = "blue") ### Rolling sums for groups with uneven time gaps SO discussion library(runner) library(dplyr) set.seed(3737) df <- data.frame( user_id = c(rep(27, 7), rep(11, 7)), date = as.Date(rep(c('2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07', '2016-01-10', '2016-01-14', '2016-01-16'), 2)), value = round(rnorm(14, 15, 5), 1)) df %>% group_by(user_id) %>% mutate( v_minus7 = sum_run(value, 7, idx = date), v_minus14 = sum_run(value, 14, idx = date) ) ## # A tibble: 14 x 5 ## # Groups: user_id [2] ## user_id date value v_minus7 v_minus14 ## <dbl> <date> <dbl> <dbl> <dbl> ## 1 27 2016-01-01 15 15 15 ## 2 27 2016-01-03 22.4 37.4 37.4 ## 3 27 2016-01-05 13.3 50.7 50.7 ## 4 27 2016-01-07 21.9 72.6 72.6 ## 5 27 2016-01-10 20.6 55.8 93.2 ## 6 27 2016-01-14 18.6 39.2 112. ## 7 27 2016-01-16 16.4 55.6 113. ## 8 11 2016-01-01 6.8 6.8 6.8 ## 9 11 2016-01-03 21.3 28.1 28.1 ## 10 11 2016-01-05 19.8 47.9 47.9 ## 11 11 2016-01-07 22 69.9 69.9 ## 12 11 2016-01-10 19.4 61.2 89.3 ## 13 11 2016-01-14 17.5 36.9 107. ## 14 11 2016-01-16 19.3 56.2 119. # runner with dplyr ### Unique for specified time frame SO discussion library(runner) library(dplyr) df <- read.table(text = " user_id date category 27 2016-01-01 apple 27 2016-01-03 apple 27 2016-01-05 pear 27 2016-01-07 plum 27 2016-01-10 apple 27 2016-01-14 pear 27 2016-01-16 plum 11 2016-01-01 apple 11 2016-01-03 pear 11 2016-01-05 pear 11 2016-01-07 pear 11 2016-01-10 apple 11 2016-01-14 apple 11 2016-01-16 apple", header = TRUE) df %>% group_by(user_id) %>% mutate( distinct_7 = runner(category, k = "7 days", idx = as.Date(date), f = function(x) length(unique(x))), distinct_14 = runner(category, k = "14 days", idx = as.Date(date), f = function(x) length(unique(x))) ) ## # A tibble: 14 x 5 ## # Groups: user_id [2] ## user_id date category distinct_7 distinct_14 ## <int> <fct> <fct> <int> <int> ## 1 27 2016-01-01 apple 1 1 ## 2 27 2016-01-03 apple 1 1 ## 3 27 2016-01-05 pear 2 2 ## 4 27 2016-01-07 plum 3 3 ## 5 27 2016-01-10 apple 3 3 ## 6 27 2016-01-14 pear 2 3 ## 7 27 2016-01-16 plum 3 3 ## 8 11 2016-01-01 apple 1 1 ## 9 11 2016-01-03 pear 2 2 ## 10 11 2016-01-05 pear 2 2 ## 11 11 2016-01-07 pear 2 2 ## 12 11 2016-01-10 apple 2 2 ## 13 11 2016-01-14 apple 1 2 ## 14 11 2016-01-16 apple 1 2 ### runner with group_by mutate library(dplyr) x <- cumsum(rnorm(20)) y <- 3 * x + rnorm(20) date <- Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)) # unequaly spaced time series group <- rep(c("a", "b"), each = 10) data.frame(date, group, y, x) %>% group_by(group) %>% run_by(idx = "date", k = "5 days") %>% mutate( alpha_5 = runner( x = ., f = function(x) { coefficients(lm(x ~ y, x))[1] } ), beta_5 = runner( x = ., f = function(x) { coefficients(lm(x ~ y, x))[1] } ) ) ## # A tibble: 20 x 6 ## # Groups: group [2] ## date group y x alpha_5 beta_5 ## <date> <fct> <dbl> <dbl> <dbl> <dbl> ## 1 2020-05-17 a 0.368 0.0470 0.0470 0.0470 ## 2 2020-05-20 a 0.457 0.998 -3.91 -3.91 ## 3 2020-05-23 a 3.73 0.967 1.00 1.00 ## 4 2020-05-26 a 3.70 1.26 34.5 34.5 ## 5 2020-05-28 a 4.76 1.98 -1.26 -1.26 ## 6 2020-05-30 a 10.9 3.52 0.365 0.365 ## 7 2020-06-02 a 6.13 2.39 0.948 0.948 ## 8 2020-06-04 a 12.0 4.06 0.659 0.659 ## 9 2020-06-06 a 11.0 3.58 0.711 0.711 ## 10 2020-06-08 a 12.6 4.49 -2.38 -2.38 ## 11 2020-06-09 b 12.9 3.60 3.60 3.60 ## 12 2020-06-10 b 8.03 2.76 1.39 1.39 ## 13 2020-06-11 b 15.8 4.68 0.768 0.768 ## 14 2020-06-14 b 15.8 5.26 0.492 0.492 ## 15 2020-06-15 b 20.8 6.95 -1.24 -1.24 ## 16 2020-06-17 b 22.7 7.67 -0.165 -0.165 ## 17 2020-06-19 b 19.0 6.55 0.862 0.862 ## 18 2020-06-21 b 20.7 6.48 0.583 0.583 ## 19 2020-06-24 b 14.6 5.18 2.05 2.05 ## 20 2020-06-26 b 16.8 5.14 5.46 5.46 ### Aggregating values from another data.frame in grouped_df SO Discussion library(runner) library(dplyr) Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day') market_return <- c(rnorm(2191)) AAPL <- data.frame( Company.name = "AAPL", Date = Date, market_return = market_return ) MSFT <- data.frame( Company.name = "MSFT", Date = Date, market_return = market_return ) df <- rbind(AAPL, MSFT) df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),] df2 <- data.frame( Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")), Event_date = sample( seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by = "day"), size = 900) ) df2 %>% group_by(Company.name2) %>% mutate( intercept = runner( x = df[df$Company.name ==  Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
)[1]
}
),
slope = runner(
x = df[df$Company.name == Company.name2[1], ], k = "180 days", lag = "5 days", idx = df$Date[df\$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
)[2]
}
)
)
## # A tibble: 900 x 4
## # Groups:   Company.name2 [2]
##    Company.name2 Event_date intercept   slope
##    <fct>         <date>         <dbl>   <dbl>
##  1 AAPL          2017-08-24   -0.0441  0.0247
##  2 AAPL          2019-04-21   -0.103   0.113
##  3 AAPL          2016-06-23    0.0669 -0.0500
##  4 AAPL          2019-07-28   -0.0570  0.138
##  5 AAPL          2016-05-27    0.0758 -0.0222
##  6 AAPL          2019-05-17   -0.0747  0.120
##  7 AAPL          2015-11-13    0.0134 -0.0917
##  8 AAPL          2019-06-12   -0.0503  0.102
##  9 AAPL          2017-03-14    0.0327 -0.0526
## 10 AAPL          2018-03-15    0.0252 -0.0734
## # … with 890 more rows