I am trying to create a plot where the lines connect to their respective group, but both groups start from the same beginning point.
I have a dataframe that looks like this:
FA_MRI_APOE
Condition Treatment Age (Months) Region avg_FA sd_FA
<fctr> <chr> <fctr><chr> <dbl> <dbl>
APOE2 Sham 3.5 CC 0.1990432 NA
APOE2 Sham 3.5 EC 0.2269353 NA
APOE2 Sham 3.5 HP 0.2253147 NA
APOE2 Sham 3.5 TH 0.3257256 NA
APOE2 Sham 7.5 CC 0.3093073 0.08619885
APOE2 Sham 7.5 EC 0.2255272 0.07652789
APOE2 Sham 7.5 HP 0.2897462 0.02708867
APOE2 Sham 7.5 TH 0.2866555 0.03557000
APOE2 Sham 9.5 CC 0.1840524 NA
APOE2 Sham 9.5 EC 0.3347699 NA
dput output:
structure(list(Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), levels = c("APOE2", "APOE3", "APOE4",
"PS19"), class = "factor"), Treatment = c("Sham", "Sham", "Sham",
"Sham", "Sham", "Sham", "Sham", "Sham", "Sham", "Sham", "Sham",
"Sham", "mTBI", "mTBI", "mTBI", "mTBI", "Sham", "Sham", "Sham",
"Sham", "Sham", "Sham", "Sham", "Sham", "mTBI", "mTBI", "mTBI",
"mTBI"), `Age (Months)` = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), levels = c("3.5", "7.5", "9.5"), class = "factor"),
Region = c("CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH",
"CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH", "CC", "EC",
"HP", "TH", "CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH"
), avg_FA = c(0.199043221771717, 0.226935303, 0.225314745679497,
0.325725596398115, 0.309307302666667, 0.225527225666667,
0.289746216333333, 0.286655532, 0.184052395, 0.334769852,
0.34609792, 0.237951324, 0.303560921, 0.233204448, 0.30656011,
0.296695315, 0.256386488908901, 0.2787716635, 0.270610670559108,
0.265520500484854, 0.289559764, 0.337927988666667, 0.370170086833333,
0.352682695666667, 0.391898785, 0.112882524, 0.29461883,
0.258586437), sd_FA = c(NA, NA, NA, NA, 0.0861988548059451,
0.0765278945487883, 0.0270886655892862, 0.0355699995442251,
NA, NA, NA, NA, NA, NA, NA, NA, 0.0332061193275388, 0.0121768772823131,
0.0441863595350275, 0.063138972154108, 0.11418350071109,
0.0829486022039601, 0.0831420050073176, 0.0689368863615038,
NA, NA, NA, NA)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -28L), groups = structure(list(
Condition = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 3L), levels = c("APOE2",
"APOE3", "APOE4", "PS19"), class = "factor"), Treatment = c("Sham",
"Sham", "Sham", "mTBI", "Sham", "Sham", "mTBI"), `Age (Months)` = structure(c(1L,
2L, 3L, 2L, 1L, 2L, 2L), levels = c("3.5", "7.5", "9.5"), class = "factor"),
.rows = structure(list(1:4, 5:8, 9:12, 13:16, 17:20, 21:24,
25:28), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L), .drop = TRUE))
And here is what I wrote to generate the plot:
ggplot(FA_MRI_APOE, aes(x = `Age (Months)`,
y = avg_FA,
color = factor(Treatment, levels = c("Sham", "mTBI")),
group = factor(Treatment, levels = c("Sham", "mTBI")))) +
geom_errorbar(aes(ymin = avg_FA - sd_FA,
ymax = avg_FA + sd_FA),
width = 0.1) +
geom_point(size = 2.5) +
geom_line(group = 1) +
geom_jitter(aes(x = `Age (Months)`,
y = `Mean`),
position = position_jitter(0.1),
alpha = 0.5,
size = 1.5,
data = FA_MRI_APOE_raw) +
scale_color_manual(name = "Treatment", values = c("#0074C1", "#F7530B")) +
facet_grid(Region ~ Condition) +
theme(legend.position = "bottom")
The generated plot looks like this:
Essentially, I want the point for the 3.5 month old group to be the same starting point for the sham and mTBI data, but for it to then break off and connect to the points for its respective groups at the 7.5 and 9.5 month old points. Right now, it is just connecting all of the data points, which is not what I want, but I do not know how to fix it. Any help would be much appreciated :).
>Solution :
Since your data has no 3.5
/mTBI
rows, and you say you want the mTBI
lines to start from the 3.5
points, I suggest we need to duplicate the 3.5
-month rows, reclassify them as mTBI
, and append them to the data.
Additionally, we need to remove group=1
from your geom_line
, as it is blocking us from doing what we need with the lines. In this case, it then inherits from the original call to ggplot
with its aesthetic group = factor(Treatment, levels = c("Sham", "mTBI"))
.
Something like this?
library(dplyr)
ungroup(FA_MRI_APOE) %>%
filter(`Age (Months)` == "3.5") %>%
mutate(Treatment = "mTBI") %>%
bind_rows(FA_MRI_APOE) %>%
ggplot(aes(x = `Age (Months)`,
y = avg_FA,
color = factor(Treatment, levels = c("Sham", "mTBI")),
group = factor(Treatment, levels = c("Sham", "mTBI")))) +
geom_errorbar(aes(ymin = avg_FA - sd_FA,
ymax = avg_FA + sd_FA),
width = 0.1) +
geom_point(size = 2.5) +
geom_line() +
# geom_jitter(aes(x = `Age (Months)`,
# y = `Mean`),
# position = position_jitter(0.1),
# alpha = 0.5,
# size = 1.5,
# data = FA_MRI_APOE_raw) +
scale_color_manual(name = "Treatment", values = c("#0074C1", "#F7530B")) +
facet_grid(Region ~ Condition) +
theme(legend.position = "bottom")
I commented out the geom_points
as I don’t have the FA_MRI_APOE_raw
data.
You mentioned the possibility of extra points being a problem. If it doesn’t look right for you, we can filter out the 3.5 and mTBI data for the points alone by replacing the geom_point
above with
geom_point(size = 2.5, data = ~ filter(., `Age (Months)` != "3.5" | Treatment != "mTBI")) +