Median for all numeric and mode for all characters in R

December 1, 2021

With a dataset like original:

id <- c("JF", "GH", "GH", "ANN", "GH", "ROG", "JF")
group <- c("most", "least", "most", "least", "least", "most", "least")
NP <- c(4,6,18,1,3,12,8)
iso_USA <- c(1, 0, 0, 0, 0, 1, 1)
iso_CHN <- c(0, 1, 1, 0, 0, 0, 0)
color <- c("blue", "orange", "blue", "blue", "red", "orange", "black")

original <- data.frame(id, group, NP, iso_USA, iso_CHN, color)


numeric <- unlist(lapply(original, is.numeric))  
numeric <- names(original[ , numeric])

char <- unlist(lapply(original, is.character))  
char <- names(original[ , char])
char <- char[-1]   #remove id from variables of interest

I want to group by "group" and calculate the median for the numeric variables and the mode for the character variables. Therefore, the data looks like original2. Note that my actual dataset has way more columns than the mock version presented here:

group <- c("least", "most")
NP <- c(6,12)
iso_USA <- c(0,1)
iso_CHN <- c(0, 0)
color <- c("orange", "blue")

original2 <- data.frame(group, NP, iso_USA, iso_CHN, color)

Any clue?

>Solution :

Using dplyr‘s across functionality and the the accepted answer at the FAQ about implementing a mode function:

Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

library(dplyr)
original %>%
  select(-id) %>%
  group_by(group) %>%
  summarize(
    across(where(is.numeric), median),
    across(where(is.character), Mode)
  )
# # A tibble: 2 × 6
#   group    NP iso_USA iso_CHN color 
#   <chr> <dbl>   <dbl>   <dbl> <chr> 
# 1 least   4.5       0       0 orange
# 2 most   12         1       0 blue