Here is my data:
data <- data.frame(
ID = 1:6,
Course1A = c("A1", "A1", "A1", "A1", "A1", "A1"),
Time1A = c(1, 1, 1, 1, 1, 1),
Score1A = c(20, 17, 20, 16, 20, 11),
Course2B = c("B1", "B1", "B1", "B1", "B1", "B1"),
Time2B = c(1, 1, 1, 1, 1, 1),
Score2B = c(12, 11, 15, 15, 10, 15),
Course3C = c("C1", "C1", "C1", "C1", "C1", "C1"),
TimeC3 = c(1, 1, 1, 1, 1, 1),
ScoreC3 = c(10, 12, 12, 10, 10, 10),
Course4D = c("D1", "D1", "D1", "D1", "D1", "D1"),
TimeD4 = c(1, 1, 1, 1, 1, 1),
ScoreD4 = c(20, 20, 18, 20, 17, 20),
Course5E = c("E1", "E1", "E1", "E1", "E1", "E1"),
TimeE5 = c(2, 2, 2, 2, 2, 2),
Score5E = c(12, 12, 12, 12, 11, 11),
Course6F = c("F1", "F1", "F1", "F1", "F1", "F1"),
TimeF6 = c(2, 2, 2, 2, 2, 2),
ScoreF6 = c(10, 10, 10, 10, 10, 9),
Course7G = c("G1", "G1", "G1", "G1", "G1", "G1"),
TimeG7 = c(3, 3, 3, 3, 3, 3),
Score7G = c(12, 11, 6, 4, 12, 9),
Course8H = c("H1", "H1", "H1", "H1", "H1", "H1"),
TimeH8 = c(3, 3, 3, 3, 3, 3),
Score8H = c(12, 12, 12, 12, 10, 10),
Gender = c("F", "F", "F", "F", "F", "F"),
Race = c("A", "B", "C", "C", "C", "C"),
Health = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
stringsAsFactors = FALSE
I want to get columns for ID, Course, Time, Gender, Race and health and Score.
I have used the following codes, but I get a wrong table
reshaped_data <- data %>%
pivot_longer(
cols = -c(ID, Gender, Race, Health),
names_to = c(".value", "Group"),
names_pattern = "([A-Za-z]+)([0-9]+[A-Z])"
)
>Solution :
I think it might be easiest if you rationalized the names for the variables to make the suffixes the same. For example, you want to make it so that 3C and C3 get the same value, then the reshape will work easily.
library(dplyr)
library(tidyr)
library(stringr)
data <- data.frame(
ID = 1:6,
Course1A = c("A1", "A1", "A1", "A1", "A1", "A1"),
Time1A = c(1, 1, 1, 1, 1, 1),
Score1A = c(20, 17, 20, 16, 20, 11),
Course2B = c("B1", "B1", "B1", "B1", "B1", "B1"),
Time2B = c(1, 1, 1, 1, 1, 1),
Score2B = c(12, 11, 15, 15, 10, 15),
Course3C = c("C1", "C1", "C1", "C1", "C1", "C1"),
TimeC3 = c(1, 1, 1, 1, 1, 1),
ScoreC3 = c(10, 12, 12, 10, 10, 10),
Course4D = c("D1", "D1", "D1", "D1", "D1", "D1"),
TimeD4 = c(1, 1, 1, 1, 1, 1),
ScoreD4 = c(20, 20, 18, 20, 17, 20),
Course5E = c("E1", "E1", "E1", "E1", "E1", "E1"),
TimeE5 = c(2, 2, 2, 2, 2, 2),
Score5E = c(12, 12, 12, 12, 11, 11),
Course6F = c("F1", "F1", "F1", "F1", "F1", "F1"),
TimeF6 = c(2, 2, 2, 2, 2, 2),
ScoreF6 = c(10, 10, 10, 10, 10, 9),
Course7G = c("G1", "G1", "G1", "G1", "G1", "G1"),
TimeG7 = c(3, 3, 3, 3, 3, 3),
Score7G = c(12, 11, 6, 4, 12, 9),
Course8H = c("H1", "H1", "H1", "H1", "H1", "H1"),
TimeH8 = c(3, 3, 3, 3, 3, 3),
Score8H = c(12, 12, 12, 12, 10, 10),
Gender = c("F", "F", "F", "F", "F", "F"),
Race = c("A", "B", "C", "C", "C", "C"),
Health = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
stringsAsFactors = FALSE)
To do this, I would first find the variables you want and save the stem (i.e., Time, Course, Score) and the suffix (i.e., the two-character ending).
sfx <- gsub("(Time|Course|Score)(.*)", "\\2", grep("Time|Score|Course", names(data), value=TRUE))
stem <- gsub("(Time|Course|Score)(.*)", "\\1", grep("Time|Score|Course", names(data), value=TRUE))
Next, you can split the two suffix characters apart, then sort them and paste them back together.
sfx <- str_split(sfx, "", simplify=TRUE)
sfx <- apply(sfx, 1, function(x)paste(sort(x), collapse=""))
Then, you can paste the newly rationalized suffixes on to the stems and rename the relevant columns:
names(data)[grep("Time|Score|Course", names(data))] <- paste(stem, sfx, sep="")
Then, the reshape works as expected.
reshaped_data <- data %>%
pivot_longer(
cols = -c(ID, Gender, Race, Health),
names_to = c(".value", "Group"),
names_pattern = "(Course|Time|Score)(.{2})$"
)
reshaped_data
#> # A tibble: 48 × 8
#> ID Gender Race Health Group Course Time Score
#> <int> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 1 F A Yes 1A A1 1 20
#> 2 1 F A Yes 2B B1 1 12
#> 3 1 F A Yes 3C C1 1 10
#> 4 1 F A Yes 4D D1 1 20
#> 5 1 F A Yes 5E E1 2 12
#> 6 1 F A Yes 6F F1 2 10
#> 7 1 F A Yes 7G G1 3 12
#> 8 1 F A Yes 8H H1 3 12
#> 9 2 F B Yes 1A A1 1 17
#> 10 2 F B Yes 2B B1 1 11
#> # … with 38 more rows
Created on 2023-03-31 with reprex v2.0.2