Create group id sequence but omit specific rows

February 26, 2022

I have this dataframe:

df <- structure(list(value = c("+", "+", "+", "-", "+", "-", "+", "+", 
"+", "+", "-", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -39L))

   value
   <chr>
 1 +    
 2 +    
 3 +    
 4 -    
 5 +    
 6 -    
 7 +    
 8 +    
 9 +    
10 +    
# ... with 29 more rows

I want to add a grouping id for each 4 rows like:

df %>% 
  mutate(id = rep(row_number(), each=4, length.out = n()))

 value id
1      +  1
2      +  1
3      +  1
4      -  1
5      +  2
6      -  2
7      +  2
8      +  2
9      +  3
10     +  3
... and so on

But I want that rows with - are omitted like:

df_expected
   value id expected_id
1      +  1           1
2      +  1           1
3      +  1           1
4      -  1           1
5      +  2           1
6      -  2           2
7      +  2           2
8      +  2           2
9      +  3           2
10     +  3           2
11     -  3           3
12     +  3           3
13     +  4           3
14     +  4           3
15     +  4           3
16     +  4           4
17     +  5           4
18     +  5           4
19     +  5           4
20     +  5           5
21     +  6           5
22     +  6           5
23     +  6           5
24     +  6           6
25     +  7           6
26     +  7           6
27     +  7           6
28     +  7           7
29     +  8           7
30     +  8           7
31     +  8           7
32     +  8           8
33     +  9           8
34     +  9           8
35     +  9           8
36     +  9           9
37     + 10           9
38     + 10           9
39     + 10           9

>Solution :

One option is to subset the rows where ‘value’ is not -, create the column ‘id’ using the same code as in OP’s post and then fill the NAs

library(data.table)
library(dplyr)
library(tidyr)
 setDT(df)[value != "-", id := rep(seq_len(.N), each = 4, length.out = .N) ]
df %>%
   fill(id, .direction = "updown")

-output

    value    id
    <char> <int>
 1:      +     1
 2:      +     1
 3:      +     1
 4:      -     1
 5:      +     1
 6:      -     2
 7:      +     2
 8:      +     2
 9:      +     2
10:      +     2
11:      -     3
12:      +     3
13:      +     3
14:      +     3
15:      +     3
16:      +     4
17:      +     4
18:      +     4
19:      +     4
20:      +     5
21:      +     5
22:      +     5
23:      +     5
24:      +     6
25:      +     6
26:      +     6
27:      +     6
28:      +     7
29:      +     7
30:      +     7
31:      +     7
32:      +     8
33:      +     8
34:      +     8
35:      +     8
36:      +     9
37:      +     9
38:      +     9
39:      +     9

Or using dplyr with fill from tidyr – instead of creating the rep on the whole sequence, subset the row_number() where ‘value’ is not equal to ‘-‘, and use replace to assign only to those elements with the rep output

df %>% 
   mutate(id =  replace(rep(NA_integer_, n()), value != "-", 
     rep(row_number()[value != "-"], each = 4, 
        length.out = sum(value != "-")))) %>% 
   fill(id, .direction = "updown")