I have a data frame that looks like this :
| var1 | var2 | var3 |
|---|---|---|
| Group_A | a,b,c,d,e | 1 |
| Group_B | f,g | 2 |
| Group_C | h,i | 3 |
| Hyper_group_A | Group_A,Group_B | 4 |
| Group_D | j,k | 5 |
| Group_E | l,m | 6 |
| Group_F | n,o | 7 |
| Hyper_group_B | Hyper_group_A,p | 8 |
i want to ungroup the elements in column var2 and to look like this :
| var1 | var2 | var3 |
|---|---|---|
| Group_A | a | 1 |
| Group_A | b | 1 |
| Group_A | c | 1 |
| Group_A | d | 1 |
| Group_A | e | 1 |
| Group_B | f | 2 |
| Group_B | g | 2 |
| …. | … | … |
| …. | … | … |
| …. | … | … |
| Hyper_group_B | Hyper_group_A | 8 |
| Hyper_group_B | p | 8 |
How can i do this in R using dplyr ?
var1 = c("Group_A","Group_B","Group_C","Hyper_group_A",
"Group_D","Group_E","Group_F","Hyper_group_B")
var2 = c(c("a,b,c,d,e"),c("f,g"),c("h,i"),c("Group_A,Group_B"),
c("j,k"),c("l,m"),c("n,o"),
c("Hyper_group_A,p"))
var3 = seq(1,8,1)
data = tibble(var1,var2,var3);data
>Solution :
How about this:
library(dplyr)
library(tidyr)
library(stringr)
var1 = c("Group_A","Group_B","Group_C","Hyper_group_A",
"Group_D","Group_E","Group_F","Hyper_group_B")
var2 = c(c("a,b,c,d,e"),c("f,g"),c("h,i"),c("Group_A,Group_B"),
c("j,k"),c("l,m"),c("n,o"),
c("Hyper_group_A,p"))
var3 = seq(1,8,1)
data = tibble(var1,var2,var3)
data %>%
rowwise() %>%
mutate(var2 = list(c(str_split(var2, ",", simplify=TRUE)))) %>%
unnest(var2) %>%
arrange(var1, var2)
#> # A tibble: 19 × 3
#> var1 var2 var3
#> <chr> <chr> <dbl>
#> 1 Group_A a 1
#> 2 Group_A b 1
#> 3 Group_A c 1
#> 4 Group_A d 1
#> 5 Group_A e 1
#> 6 Group_B f 2
#> 7 Group_B g 2
#> 8 Group_C h 3
#> 9 Group_C i 3
#> 10 Group_D j 5
#> 11 Group_D k 5
#> 12 Group_E l 6
#> 13 Group_E m 6
#> 14 Group_F n 7
#> 15 Group_F o 7
#> 16 Hyper_group_A Group_A 4
#> 17 Hyper_group_A Group_B 4
#> 18 Hyper_group_B Hyper_group_A 8
#> 19 Hyper_group_B p 8
Created on 2022-10-19 by the reprex package (v2.0.1)