Conditionally synchronize asynchronous data across columns

January 7, 2022

I have data with partly asynchronous observations in columns Area_l and Area_r:

df <- structure(list(rec_time = c("00:00:00.103", "00:00:00.136", "00:00:00.170", 
                                  "00:00:00.203", "00:00:00.224", "00:00:00.236", "00:00:00.240", 
                                  "00:00:00.241", "00:00:00.257", "00:00:00.270", "00:00:00.274", 
                                  "00:00:00.290", "00:00:00.291", "00:00:00.303"), 
                     UTC = c(1584090875364, 1584090875397, 1584090875431, 1584090875464, 1584090875485, 1584090875497, 
                             1584090875501, 1584090875502, 1584090875518, 1584090875531, 1584090875535, 
                             1584090875551, 1584090875552, 1584090875564), 
                     Area_l = c(NA, NA, NA, NA, 1093.292, NA, 1080.093, NA, 1064.014, NA, 1057.508,1058.605, NA, NA), 
                     Area_r = c(NA,NA, NA, NA, NA, NA, NA, 1221.454, 1214.822, NA, 1208.862, NA, 1208.747, NA)), 
                row.names = 2:15, class = "data.frame")

Sometimes Area_r values lag behind Area_l values by a splitsecond. Those values I want to lift up one row. The following method is only partially successful:

library(dplyr)
df %>%              
  # remove rows where both `Area_*` is `NA`:
  filter(!(is.na(Area_l) & is.na(Area_r))) %>% 
  mutate(
    # lift `Area_r` values up one row *if* `Area_l` is `NA`:
    Area_r = ifelse(is.na(Area_l),                     
                    lead(Area_r), Area_r))
      rec_time          UTC   Area_l   Area_r
1 00:00:00.224 1.584091e+12 1093.292       NA
2 00:00:00.240 1.584091e+12 1080.093       NA  
3 00:00:00.241 1.584091e+12       NA 1214.822
4 00:00:00.257 1.584091e+12 1064.014 1214.822
5 00:00:00.274 1.584091e+12 1057.508 1208.862
6 00:00:00.290 1.584091e+12 1058.605       NA
7 00:00:00.291 1.584091e+12       NA       NA

The correct result is this:

       rec_time          UTC   Area_l   Area_r
6  00:00:00.224 1.584091e+12 1093.292       NA
8  00:00:00.240 1.584091e+12 1080.093 1221.454
9  00:00:00.241 1.584091e+12       NA 1221.454
10 00:00:00.257 1.584091e+12 1064.014 1214.822
12 00:00:00.274 1.584091e+12 1057.508 1208.862
13 00:00:00.290 1.584091e+12 1058.605 1208.747
14 00:00:00.291 1.584091e+12       NA 1208.747

How can this be obtained?

>Solution :

You have to use lead in your condition as well, with a default different from NA:

df %>%              
  filter(!(is.na(Area_l) & is.na(Area_r))) %>% 
  mutate(Area_r = ifelse(is.na(lead(Area_l,default=0)), lead(Area_r), Area_r))

      rec_time          UTC   Area_l   Area_r
1 00:00:00.224 1.584091e+12 1093.292       NA
2 00:00:00.240 1.584091e+12 1080.093 1221.454
3 00:00:00.241 1.584091e+12       NA 1221.454
4 00:00:00.257 1.584091e+12 1064.014 1214.822
5 00:00:00.274 1.584091e+12 1057.508 1208.862
6 00:00:00.290 1.584091e+12 1058.605 1208.747
7 00:00:00.291 1.584091e+12       NA 1208.747