Advertisements
I have a table like this
df <- structure(list(A = c("rs1544968", "rs1544968", "rs1544968", "rs1544968",
"rs1544968", "rs1544968", "rs1544968", "rs1544968", "rs1544968",
"rs1544968", "rs60296873", "rs60296873", "rs2811442", "rs2811442",
"rs2811442", "rs2811442"), B = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 10L, 10L, 3L, 3L, 3L, 3L), C = c("2.37E+08", "2,37E+08",
"2.37E+08", "2,37E+08", "2.37E+08", "2,37E+08", "2.37E+08", "2,37E+08",
"2.37E+08", "2,37E+08", "33171937", "33171937", "1,3E+08", "1,3E+08",
"1,3E+08", "1,3E+08"), D = c("A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A"), E = c("G", "G",
"G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G",
"G"), F = c("ENSG00000116984", "ENSG00000186197", "ENSG00000119285",
"ENSG00000077522", "ENSG00000077522", "ENSG00000116977", "ENSG00000244020",
"ENSG00000086619", "ENSG00000198626", "ENSG00000077585", "ENSG00000150093",
"ENSG00000099250", "ENSG00000172765", "ENSG00000170893", "ENSG00000172765",
"ENSG00000170893")), class = "data.frame", row.names = c(NA,
-16L))
How can I collect all genes from the column
6 that correspond to each value in column 1?
For example, I would like to have :
>Solution :
We could first group and then use toString
:
library(dplyr) #> 1.1.0
df %>%
summarise(F = toString(F), .by = c(A, B, C, E))
# Groups: A, B, C [4]
A B C E F
<chr> <int> <chr> <chr> <chr>
1 rs1544968 1 2,37E+08 G ENSG00000186197, ENSG00000077522, ENSG00000116977, ENSG00000086619, ENSG000…
2 rs1544968 1 2.37E+08 G ENSG00000116984, ENSG00000119285, ENSG00000077522, ENSG00000244020, ENSG000…
3 rs2811442 3 1,3E+08 G ENSG00000172765, ENSG00000170893, ENSG00000172765, ENSG00000170893
4 rs60296873 10 33171937 G ENSG00000150093, ENSG00000099250