I have a dataframe df
df <- structure(list(GENE = c("TNFRSF4", "TNFRSF4", "VWA1", "VWA1",
"PEX10", "CEP104"), KEY.varID = c("chr1:1213738:G:A", "chr1:1232280:T:C",
"chr1:1435798:T:TGGCGCGGAGC", "chr1:1437401:C:G", "chr1:2406791:C:CT",
"chr1:3844977:G:A")), row.names = c(NA, -6L), class = "data.frame")
Code I tried:
library(dplyr)
df %>% group_by(GENE) %>%
mutate(all_variants = paste(KEY.varID, collapse = ","))
Result I want:
GENE KEY.varID
TNFRSF4 chr1:1213738:G:A, chr1:1232280:T:C
VWA1 chr1:1435798:T:TGGCGCGGAGC, chr1:1437401:C:G
PEX10 chr1:2406791:C:CT
CEP104 chr1:3844977:G:A
>Solution :
Or using dplyr:
library(tidyverse)
library(data.table)
df %>%
group_by(GENE) %>%
summarise(KEY.varID = str_c(KEY.varID, collapse = ", ")) %>%
as.data.table
#> GENE KEY.varID
#> 1: CEP104 chr1:3844977:G:A
#> 2: PEX10 chr1:2406791:C:CT
#> 3: TNFRSF4 chr1:1213738:G:A, chr1:1232280:T:C
#> 4: VWA1 chr1:1435798:T:TGGCGCGGAGC, chr1:1437401:C:G