Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to group and sum with a special condition?

My test dataset

structure(list(record_id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L), ev_faudit = structure(c(19037, 19037, 19037, 19038, 
19038, 19039, 19039, 19040, 19040, 19041, 19041), class = "Date"), 
    redcap_repeat_instance = c(41L, 41L, 41L, 42L, 42L, 43L, 
    43L, 44L, 44L, 45L, 45L), ne_cc = c("119697xxxx ", "119697xxxx ", 
    "119697xxxx ", "119697xxxx ", "119697xxxx ", "119697xxxx ", 
    "119697xxxx ", "119697xxxx ", "119697xxxx ", "119697xxxx ", 
    "119697xxxx "), nom_ips = structure(c(2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L), redcapLabels = c("location1", 
    "location2"), redcapLevels = 1:2), ne_auditor = c("user1", 
    "user1", "user1", "user1", "user1", 
    "user1", "user1", "user1", "user1", 
    "user1", "user1"), ev_dias1 = c(2, 2, 2, 
    1, 1, 1, 1, 1, 1, 1, 1), var2 = c("ev_hallazgo1___1", "ev_apoyo_dx1___9", 
    "ev_apoyo_dx1___13", "ev_hallazgo1___4", "ev_costo_oportunidad___1", 
    "ev_hallazgo1___4", "ev_costo_oportunidad___1", "ev_hallazgo1___1", 
    "ev_apoyo_dx1___13", "ev_hallazgo1___1", "ev_apoyo_dx1___13"
    ), var1 = c("Ineficiencia", "Videotelemetria", "Polisomnografia", 
    "Costo de oportunidad", "Demora tramite ERP", "Costo de oportunidad", 
    "Demora tramite ERP", "Ineficiencia", "Polisomnografia", 
    "Ineficiencia", "Polisomnografia"), val1 = c(NA, 70000, 150000, 
    NA, 450000, NA, 450000, NA, 150000, NA, 150000)), row.names = c(NA, 
-11L), class = c("tbl_df", "tbl", "data.frame"))

I require to group by record_id and redcap_repeat_instance and place the resulting sum of all the values from val1 next to the equivalent val1 when var2 starts with ev_hallazgo1

Desired output

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

structure(list(record_id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L), ev_faudit = structure(c(19037, 19037, 19037, 19038, 
19038, 19039, 19039, 19040, 19040, 19041, 19041), class = "Date"), 
    redcap_repeat_instance = c(41L, 41L, 41L, 42L, 42L, 43L, 
    43L, 44L, 44L, 45L, 45L), ne_cc = c("119697xxxx ", "119697xxxx ", 
    "119697xxxx ", "119697xxxx ", "119697xxxx ", "119697xxxx ", 
    "119697xxxx ", "119697xxxx ", "119697xxxx ", "119697xxxx ", 
    "119697xxxx "), nom_ips = structure(c(2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L), redcapLabels = c("location1", 
    "location2"), redcapLevels = 1:2), ne_auditor = c("user1", 
    "user1", "user1", "user1", "user1", 
    "user1", "user1", "user1", "user1", 
    "user1", "user1"), ev_dias1 = c(2, 2, 2, 
    1, 1, 1, 1, 1, 1, 1, 1), var2 = c("ev_hallazgo1___1", "ev_apoyo_dx1___9", 
    "ev_apoyo_dx1___13", "ev_hallazgo1___4", "ev_costo_oportunidad___1", 
    "ev_hallazgo1___4", "ev_costo_oportunidad___1", "ev_hallazgo1___1", 
    "ev_apoyo_dx1___13", "ev_hallazgo1___1", "ev_apoyo_dx1___13"
    ), var1 = c("Ineficiencia", "Videotelemetria", "Polisomnografia", 
    "Costo de oportunidad", "Demora tramite ERP", "Costo de oportunidad", 
    "Demora tramite ERP", "Ineficiencia", "Polisomnografia", 
    "Ineficiencia", "Polisomnografia"), val1 = c(220000, 70000, 150000, 
    45000, 450000, 45000, 450000, 150000, 150000, 150000, 150000)), row.names = c(NA, 
-11L), class = c("tbl_df", "tbl", "data.frame"))

>Solution :

We can either create the condition on NA (is.na) or from the specific substring of ‘var1’ – grouped by the ‘record_id’, and ‘redcap_repeat_instance’, replace the ‘val1’ where the ‘var2’ starts with substring with sum of all the values in ‘val1’ for that group

library(dplyr)
library(stringr)
test %>% 
  group_by(record_id, redcap_repeat_instance) %>% 
  mutate(val1 = replace(val1, str_detect(var2, "^ev_hallazgo1"), 
        sum(val1, na.rm = TRUE))) %>%
  ungroup

-output

# A tibble: 11 Ă— 10
   record_id ev_faudit  redcap_repeat_instance ne_cc         nom_ips ne_auditor ev_dias1 var2                     var1                   val1
       <int> <date>                      <int> <chr>           <int> <chr>         <dbl> <chr>                    <chr>                 <dbl>
 1         5 2022-02-14                     41 "119697xxxx "       2 user1             2 ev_hallazgo1___1         Ineficiencia         220000
 2         5 2022-02-14                     41 "119697xxxx "       2 user1             2 ev_apoyo_dx1___9         Videotelemetria       70000
 3         5 2022-02-14                     41 "119697xxxx "       2 user1             2 ev_apoyo_dx1___13        Polisomnografia      150000
 4         5 2022-02-15                     42 "119697xxxx "       2 user1             1 ev_hallazgo1___4         Costo de oportunidad 450000
 5         5 2022-02-15                     42 "119697xxxx "       2 user1             1 ev_costo_oportunidad___1 Demora tramite ERP   450000
 6         5 2022-02-16                     43 "119697xxxx "       2 user1             1 ev_hallazgo1___4         Costo de oportunidad 450000
 7         5 2022-02-16                     43 "119697xxxx "       2 user1             1 ev_costo_oportunidad___1 Demora tramite ERP   450000
 8         5 2022-02-17                     44 "119697xxxx "       2 user1             1 ev_hallazgo1___1         Ineficiencia         150000
 9         5 2022-02-17                     44 "119697xxxx "       2 user1             1 ev_apoyo_dx1___13        Polisomnografia      150000
10         5 2022-02-18                     45 "119697xxxx "       2 user1             1 ev_hallazgo1___1         Ineficiencia         150000
11         5 2022-02-18                     45 "119697xxxx "       2 user1             1 ev_apoyo_dx1___13        Polisomnografia      150000

If those are the only NAs, another option is coalesce

test %>% 
 group_by(record_id, redcap_repeat_instance)  %>%
 mutate(val1 = coalesce(val1, sum(val1, na.rm = TRUE))) %>% 
 ungroup
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading