Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to pivot_longer with multiple columns to pivot

I have a df in wide format that has columns for start and end times for negative and positive controls (total 4 columns). I want to condense those four columns into 3 columns with one being "control" that is binary with a "1" indicating it is control and a "0" indicating it is an experiment, the second being "start_time", and the third being "end_time". Please see my dput for MRE and preferably I would like to see how this is done with the tidyverse

data <- structure(list(region = c("el_valle", "el_valle", "el_valle", 
"el_valle", "el_valle", "el_valle", "el_valle", "el_valle", "el_cope", 
"el_cope", "el_cope", "el_cope", "el_cope", "el_cope", "el_cope", 
"el_cope", "el_cope", "santa_fe", "santa_fe", "santa_fe", "santa_fe", 
"santa_fe", "santa_fe", "el_cope", "el_cope", "el_cope", "el_cope", 
"el_cope", "el_cope"), site = c("jordinal", "jordinal", "jordinal", 
"jordinal", "jordinal", "rio_maria", "rio_maria", "rio_maria", 
"rio_blanco", "rio_blanco", "rio_blanco", "guabal", "guabal", 
"guabal", "rio_tigrero", "rio_tigrero", "rio_tigrero", "altos_de piedra", 
"altos_de piedra", "altos_de piedra", "altos_de piedra", "altos_de piedra", 
"altos_de piedra", "sophia_stream", "sophia_stream", "sophia_stream", 
"medina", "medina", "medina"), start_date = c("2022-07-21T00:00:00+00:00", 
"2022-07-21T00:00:00+00:00", "2022-07-21T00:00:00+00:00", "2022-07-21T00:00:00+00:00", 
"2022-07-21T00:00:00+00:00", "2022-07-21T00:00:00+00:00", "2022-07-21T00:00:00+00:00", 
"2022-07-21T00:00:00+00:00", "2022-07-23T00:00:00+00:00", "2022-07-23T00:00:00+00:00", 
"2022-07-23T00:00:00+00:00", "2022-07-24T00:00:00+00:00", "2022-07-24T00:00:00+00:00", 
"2022-07-24T00:00:00+00:00", "2022-07-26T00:00:00+00:00", "2022-07-26T00:00:00+00:00", 
"2022-07-26T00:00:00+00:00", "2022-07-28T00:00:00+00:00", "2022-07-28T00:00:00+00:00", 
"2022-07-28T00:00:00+00:00", "2022-07-28T00:00:00+00:00", "2022-07-28T00:00:00+00:00", 
"2022-07-28T00:00:00+00:00", "2022-07-29T00:00:00+00:00", "2022-07-29T00:00:00+00:00", 
"2022-07-29T00:00:00+00:00", "2022-07-29T00:00:00+00:00", "2022-07-29T00:00:00+00:00", 
"2022-07-29T00:00:00+00:00"), geometry = c("POINT (-80.0864405 8.6514853)", 
"POINT (-80.0865047 8.6514879)", "POINT (-80.0864767 8.6514699)", 
"POINT (-80.0864365 8.6514033)", "POINT (-80.59016347153077 8.657592487131978)", 
"POINT (-80.0723613 8.6433237)", "POINT (-80.0724193 8.6433145)", 
"POINT (-80.59012719701079 8.657653855819484)", "POINT (-80.59084727642541 8.65736553296408)", 
"POINT (-80.59008739454525 8.65762533073169)", "POINT (-80.5900603404817 8.657645618088532)", 
"POINT (-80.59006448025009 8.65775089241877)", "POINT (-80.5900978883977 8.657643924266186)", 
"POINT (-80.59013662686417 8.657665177897595)", "POINT (-119.81285845363512 39.537657157446034)", 
"POINT (-119.81285845363512 39.537657157446034)", "POINT (-119.81285845363512 39.537657157446034)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81300112116044 39.53757812923485)", "POINT (-119.81300112116044 39.53757812923485)", 
"POINT (-119.8130185476632 39.53759315771993)", "POINT (-119.81295059817982 39.53742649195985)", 
"POINT (-119.81295059817982 39.53742649195985)", "POINT (-119.8129264843882 39.53773406660491)"
), id_edna_sample = c("220721_edna_001", "220721_edna_002", "220721_edna_003", 
"220721_edna_004", "220721_edna_007", "220721_005", "220721_006", 
"220721_007", "220723_001", "220723_002", "220723_003", "220724_001", 
"220724_002", "220724_003", "220726_eDNA_001", "220726_eDNA_002", 
"220726_eDNA_003", "220728_eDNA_001", "220728_eDNA_002", "220728_eDNA_003", 
"220728_eDNA_004", "220728_eDNA_005", "220728_eDNA_006", "220729_eDNA_001", 
"220729_eDNA_002", "220729_eDNA_005", "220729_eDNA_003", "220729_eDNA_004", 
"220729_eDNA_005"), transect_meters_sampled_m = c(0, 0, 0, 0, 
0, 10, 10, 0, 200, 200, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, NA), start_time_negative_control_edna_filtering = structure(c(NA, 
NA, NA, NA, 67800, NA, NA, 67800, NA, NA, 67680, NA, NA, 65760, 
NA, NA, 62700, NA, NA, NA, NA, 68340, 68340, NA, NA, 45600, NA, 
NA, 45600), class = c("hms", "difftime"), units = "secs"), finish_time_negative_control_edna_filtering = structure(c(NA, 
NA, NA, NA, 68520, NA, NA, 68520, NA, NA, 67920, NA, NA, 66360, 
NA, NA, 63000, NA, NA, NA, NA, 68640, 68640, NA, NA, 46020, NA, 
NA, 46020), class = c("hms", "difftime"), units = "secs"), start_time_sample_edna_filtering = structure(c(39180, 
40020, 40800, 41940, NA, 54480, 55860, NA, 65280, 66480, NA, 
63300, 64380, NA, 61500, 60300, NA, 66900, 66900, 67680, 67680, 
NA, NA, 42720, 43560, NA, 44280, 44940, NA), class = c("hms", 
"difftime"), units = "secs"), finish_time_sample_edna_filtering = structure(c(39540, 
40380, 41400, 42480, NA, 54720, 56280, NA, 65520, 66840, NA, 
63660, 64980, NA, 62160, 60660, NA, 67500, 67500, 68160, 68160, 
NA, NA, 43140, 43920, NA, 44700, 45360, NA), class = c("hms", 
"difftime"), units = "secs"), volume_of_filtered_sample_ml = c(1000, 
1000, 1000, NA, NA, 1000, NA, NA, 1000, 1000, NA, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, NA), edn_comments = c("Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample", 
"Jordinal A left.", "Jordinal B right. 10:58 am took sample", 
"Jordinal B left. 10:07 took sample", NA, "Taken at 12.47 transect A right", 
"Transect A left taken at 12:47", NA, "Taken  at 18:30", "Taken at 18.30", 
NA, NA, NA, NA, "Left 5 and 0.45 um", "Right 5 and 0.45 um", 
"Negative 5 and 0.45 um", "Left 5um", "Left 0.45um", "Right 5um", 
"Right 0.45um", "Negative 5um", "Negative 0.45um", "Left 0.45 and 5um", 
"Right 0.45 and 5um", "Negative 5 and 0.45 um", "Left 0.45 and 5 um", 
"Right 0.45 and 5 um", "Negative 5 and 0.45um")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -29L))

>Solution :

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

We can use the names_pattern and its ".value" special name. I’ll end in %>% str() so that we can see the real data since it’s a bit wide as-is. (For clarity: do not use %>% str() in production, it is only for display purposes here.)

library(tidyr)
pivot_longer(
  data, 
  c(start_time_negative_control_edna_filtering, finish_time_negative_control_edna_filtering,
    start_time_sample_edna_filtering, finish_time_sample_edna_filtering), 
  names_pattern = "(start|finish)_time(.*)_edna_filtering",
  names_to = c(".value", "control")) %>%
  str()
# tibble [58 x 11] (S3: tbl_df/tbl/data.frame)
#  $ region                      : chr [1:58] "el_valle" "el_valle" "el_valle" "el_valle" ...
#  $ site                        : chr [1:58] "jordinal" "jordinal" "jordinal" "jordinal" ...
#  $ start_date                  : chr [1:58] "2022-07-21T00:00:00+00:00" "2022-07-21T00:00:00+00:00" "2022-07-21T00:00:00+00:00" "2022-07-21T00:00:00+00:00" ...
#  $ geometry                    : chr [1:58] "POINT (-80.0864405 8.6514853)" "POINT (-80.0864405 8.6514853)" "POINT (-80.0865047 8.6514879)" "POINT (-80.0865047 8.6514879)" ...
#  $ id_edna_sample              : chr [1:58] "220721_edna_001" "220721_edna_001" "220721_edna_002" "220721_edna_002" ...
#  $ transect_meters_sampled_m   : num [1:58] 0 0 0 0 0 0 0 0 0 0 ...
#  $ volume_of_filtered_sample_ml: num [1:58] 1000 1000 1000 1000 1000 1000 NA NA NA NA ...
#  $ edn_comments                : chr [1:58] "Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample" "Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample" "Jordinal A left." "Jordinal A left." ...
#  $ control                     : chr [1:58] "_negative_control" "_sample" "_negative_control" "_sample" ...
#  $ start                       : 'hms' num [1:58] NA 10:53:00 NA 11:07:00 ...
#   ..- attr(*, "units")= chr "secs"
#  $ finish                      : 'hms' num [1:58] NA 10:59:00 NA 11:13:00 ...
#   ..- attr(*, "units")= chr "secs"

I didn’t finish the control column into a binary yet, I thought I’d keep it as the extract string so you can see better what’s going on with the data. The added step to get that will be

... %>%
  mutate(control = grepl("control", control))

(assuming dplyr as well).

Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading