Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to pivot four columns into two by adding a grouping label using dplyr?

Dataset

My simulated data looks like this:

combo <- structure(list(x1 = c(-0.184530460239927, 2.58876410608302, -0.376411897125282, 
-0.918020987034264, 0.267794817079365, 0.510331712403224, -0.0405173200334374, 
-0.138325588779885, 0.238838548324343, -0.878919653298651, 1.07733581268113, 
0.626064563540553, -1.89103128062022, 0.753938255887397, 0.100128777819584, 
0.988056031450007, -1.20492449398259, -0.435112080263483, -0.458684951607226, 
-1.09119839510576, -0.727138654781203, 0.502188856207333, -1.20147250343467, 
1.17932480876155, -0.425692938732958, 1.02236820748, 0.182053835725136, 
0.380227031433806, -0.270517738476701, -0.16272617439478, 0.220593768669037, 
-0.879461950243353, -0.0679190290036845, 1.1465390380024, 0.985536359381242, 
1.30651252236453, 0.416544302978179, -1.38776959348133, 0.26987007672326, 
-0.460563840153545, 1.26860780466628, -0.0822834394483887, -0.857094965973551, 
1.55353442853147, -0.362899956015105, -0.411375625012731, -0.774646267609922, 
-1.785780738652, 0.138298869848299, -0.830007579895058), x2 = c(10.1649997431391, 
13.7753955031784, 9.14611285885852, 8.89500487630507, 11.0792634435256, 
9.40604769694862, 10.9377081086486, 10.3149083885551, 11.4433718412604, 
8.01451483884253, 10.99588355029, 11.43348692627, 8.46187682949792, 
10.2754622062573, 9.72557919495809, 12.5346844833676, 8.72464111735834, 
9.57577516193854, 8.75255475821647, 9.72989475512324, 8.84014340654749, 
11.3804494571042, 7.66077694737602, 11.5186005504865, 9.08940573637054, 
10.8650309543967, 9.61420727102614, 10.927982330081, 9.29822635541754, 
7.76242176319227, 10.7176609762039, 7.83309960333225, 11.375953933858, 
10.5394984580639, 11.188721640198, 12.2932137170417, 11.4266622202742, 
8.78385129373517, 9.63834216468245, 8.65398006594213, 9.73423323110773, 
9.90806217362202, 8.2404184732306, 12.120678223943, 9.32402321768776, 
10.0725587879694, 7.6897109556945, 6.74738232297318, 9.46663373526324, 
9.33169452437223), y1 = c(0.531152563992138, 0.757703597905396, 
-0.791655222372092, -0.302099679513038, 0.671307589662749, 2.12637384156016, 
-0.909796697236723, -1.12913854196627, 1.67269183809139, -0.434990260871762, 
-0.0700467088361849, 0.542668245540057, 0.374830971405129, -1.63773988575198, 
-2.14682334438542, 0.901301020986636, -0.511720906474068, -1.05708841652644, 
-0.272051821060645, -0.79674724927485, -1.5268191640679, -0.907794043098101, 
-0.680495891271473, -0.35552898088709, -0.680683696561238, -0.844283268256992, 
1.27768418531443, -0.961233223427325, -0.671858202047372, -0.225776242419413, 
-0.686226261720162, 1.14668780673855, -0.579007489205267, 1.48425373519236, 
0.428834567252612, -0.211941122441431, -0.512591114561531, -0.0958428297906739, 
1.68860873380911, -0.751395710328223, 1.68278553230514, 0.726087302326806, 
-1.08262236469318, 0.3445461894278, 0.826204921157919, -0.635306319455706, 
-1.82487370480718, -2.11194816404115, 0.714777331685519, -0.345433989913914
), y2 = c(10.6220278853899, 11.6063941600105, 9.88059276478581, 
8.14957601722892, 9.16367249161337, 12.762654818962, 9.00183693282061, 
9.55789660927653, 12.9609244725328, 8.90467635039521, 10.8991427528596, 
9.89206203043643, 9.41991979079818, 7.84784105454833, 8.91365647664849, 
11.3667187570646, 9.40006788407869, 9.98750951320975, 9.00951922619663, 
9.97523890006792, 8.27153592598831, 11.0884143412798, 7.94368844719581, 
10.9469372108228, 8.15593832993733, 9.84078648878971, 11.8451838496249, 
10.8083866882212, 9.61132252103889, 11.1923799807399, 9.10045982731272, 
12.0544568795267, 9.24683481504971, 14.8962611606193, 10.881540894629, 
10.1335621049274, 9.016866497424, 11.2527950167179, 11.8814160661131, 
9.22010868094334, 12.9542930603357, 10.0271771753269, 7.95131787733545, 
9.31692314229138, 11.5295911568582, 8.30646740293857, 8.389129594333, 
7.35608073778492, 11.979325721996, 10.2744608966922), z = c(-0.206932655889915, 
1.59055947315421, 0.569858682145818, -0.39269341805629, 0.0560523995691307, 
-1.35745817399495, 0.757817217783636, 1.2363198027176, -0.705515328708522, 
0.0100215308812615, 1.1496786488628, -0.0189650703302578, -1.0373398254117, 
2.53958513880197, 2.4495884905998, 0.112672166475035, -0.646251528605393, 
0.727127843912292, -0.351583194283642, -0.181459517930261, 1.02924872657184, 
1.31035563724453, -0.337780896477732, 1.1791886765987, 0.330814927636109, 
1.73920234956884, -0.809834547447509, 0.472332306869409, 0.134382367942977, 
0.705445383659287, 0.845104590511669, -2.28815412147097, -0.331179738228726, 
-0.291691270546117, -0.104170550476615, 2.12138490798859, -0.140027608407499, 
-1.03938688051102, -0.846736197331773, -0.197364615019016, -0.420498039989205, 
-1.06172053607786, -0.511495900155817, 2.07372602503043, -1.25039171703876, 
-0.00379074839623933, 0.545098097507594, -0.744687068246333, 
-0.252078776784803, -0.75556398293816)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -50L))

Problem

Having trouble wording my question or considering how to find it, so I provide the example directly here for what I’m looking for. If you inspect the data, it looks like so:

# A tibble: 50 × 5
       x1    x2      y1    y2       z
    <dbl> <dbl>   <dbl> <dbl>   <dbl>
 1  0.220  9.08 -0.763  10.3   0.596 
 2  0.712 11.5   1.79   10.2  -0.836 
 3  1.26  12.3   0.0956  9.86  0.672 
 4 -1.27   7.62  1.61   10.7  -2.16  
 5  0.285 10.7  -0.0596 10.5  -0.357 
 6  0.303 11.1   0.613  10.9  -0.113 
 7 -0.618  7.90  1.56   10.1  -1.97  
 8  2.24  10.9  -0.0412  8.38  2.22  
 9  1.27  10.2   0.290   8.72  1.05  
10  0.230 10.2   0.392  10.5   0.0289
# ℹ 40 more rows
# ℹ Use `print(n = ...)` to see more rows

You can clearly see that there are variables labeled with a "1" and "2" here. I want a new column that specifies a Group 1 and Group 2 by pivoting their associated columns here. So x1 and y1 belong to Group 1 and x2 and y2 belong to Group 2. I already know how to do something similar to this using pivot_longer, but I’m struggling with figuring out how to simultaneously group these variables together while creating a new label column. The target data I want should look something like this, where x is simply a combo of x1 and x2 and y is a combo of y1 and y2, while group labels which group each person belongs to:

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

  group         x       y       z
   <chr>     <dbl>   <dbl>   <dbl>
 1 Group 1 -3.51    0.181  -0.0610
 2 Group 2  1.63    1.35    0.405 
 3 Group 1  0.271  -0.167  -0.0578
 4 Group 2 -0.776   0.580   0.0599
 5 Group 1 -1.48   -0.0631  0.966 
 6 Group 2  0.757  -2.10    2.07  
 7 Group 1 -1.37    0.462   0.882 
 8 Group 2  0.596   0.0123  0.156 
 9 Group 1 -0.494  -0.824  -1.18  
10 Group 2  0.783  -2.28   -1.40  
11 Group 1 -0.0388 -0.209   0.868 
12 Group 2 -0.255   0.121   1.07  
13 Group 1 -0.758   0.242  -0.665 
14 Group 2 -0.966   1.66    0.374 
15 Group 1 -0.595  -1.17   -0.612 
16 Group 2  1.12   -0.205  -0.484 
17 Group 1 -0.468  -0.404  -0.420 
18 Group 2 -0.361   0.257   1.89  
19 Group 1  0.165   0.989   1.50  
20 Group 2  0.681  -0.304   0.154

>Solution :

I believe you need a combination of names_pattern and a ".value" in names_to. From the documentation:

".value" indicates the corresponding component of the column name defines the name of the output column containing the cell values.

library(tidyverse)

combo |> 
  pivot_longer(-z, names_to = c(".value", "Group"), names_pattern = "([a-z])(\\d)") |> 
  relocate(-z)

# A tibble: 100 × 4
   Group      x      y       z
   <chr>  <dbl>  <dbl>   <dbl>
 1 1     -0.185  0.531 -0.207 
 2 2     10.2   10.6   -0.207 
 3 1      2.59   0.758  1.59  
 4 2     13.8   11.6    1.59  
 5 1     -0.376 -0.792  0.570 
 6 2      9.15   9.88   0.570 
 7 1     -0.918 -0.302 -0.393 
 8 2      8.90   8.15  -0.393 
 9 1      0.268  0.671  0.0561
10 2     11.1    9.16   0.0561
# ℹ 90 more rows
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading