Compare multiple methods to get row-wise operation done on a data frame.

In this case, we wish to extract the corresponding highest value column.

library(tidyverse)
set.seed(1212)

# a dummy data frame 
dummy_df <- matrix(runif(15, min = 1, max = 9), ncol = 3) %>% as_data_frame()
colnames(dummy_df) <- c("x", "y", "z")

Method 1: dplyr::rowwise

use_rowwise <- function(df) {
    df %>% 
        rowwise() %>% 
        mutate(max = names(df)[which.max(c(x, y, z))])
}
use_rowwise(dummy_df)
## Source: local data frame [5 x 4]
## Groups: <by row>
## 
## # A tibble: 5 x 4
##       x     y     z max  
##   <dbl> <dbl> <dbl> <chr>
## 1  3.12  1.27  5.12 z    
## 2  1.87  1.70  7.48 z    
## 3  8.74  1.47  1.22 x    
## 4  3.81  6.63  5.96 y    
## 5  6.08  2.94  6.28 z

Method 2: plyr::apply

use_apply <- function(df) {
    df %>% 
        mutate(max = names(df)[apply(df, 1, which.max)])
}
use_apply(dummy_df)
## # A tibble: 5 x 4
##       x     y     z max  
##   <dbl> <dbl> <dbl> <chr>
## 1  3.12  1.27  5.12 z    
## 2  1.87  1.70  7.48 z    
## 3  8.74  1.47  1.22 x    
## 4  3.81  6.63  5.96 y    
## 5  6.08  2.94  6.28 z

Method 3: max.col from data.table

library(data.table)
use_datatable <- function(df) {
    dt <- as.data.table(df)
    dt[, max :=  names(.SD)[max.col(.SD)], .SDcols = 1:3]
}
use_datatable(dummy_df) %>% print()
##           x        y        z max
## 1: 3.117172 1.265315 5.118694   z
## 2: 1.868388 1.695199 7.484665   z
## 3: 8.735410 1.474048 1.217860   x
## 4: 3.808189 6.631653 5.958628   y
## 5: 6.075657 2.938117 6.275357   z

Efficiency

Lets do benchmarking on a larger data frame.

library(microbenchmark)

# for benchmarking
large_df <- matrix(runif(30e5), ncol = 3) %>% as_data_frame()
colnames(large_df) <- names(dummy_df)
dim(large_df)
## [1] 1000000       3
microbenchmark(
    use_rowwise(large_df),
    use_apply(large_df),
    use_datatable(large_df),
    times = 30
)
## Unit: milliseconds
##                     expr         min          lq        mean      median
##    use_rowwise(large_df) 15965.84186 16269.75837 17330.94463 17188.50480
##      use_apply(large_df)  2705.65599  3144.74098  3292.00400  3252.76815
##  use_datatable(large_df)    30.06385    33.83812    46.70868    43.22026
##          uq        max neval cld
##  18009.8889 20300.7133    30   c
##   3491.3965  3812.2967    30  b 
##     46.3758   116.7382    30 a

Oh yea, data.table is blazingly fast.