Geom_Line grouping error: How to properly connect points?

I am trying to create a plot where the lines connect to their respective group, but both groups start from the same beginning point.

I have a dataframe that looks like this:

FA_MRI_APOE

Condition Treatment Age (Months) Region avg_FA sd_FA
<fctr> <chr> <fctr><chr> <dbl>    <dbl>
APOE2   Sham    3.5 CC  0.1990432   NA
APOE2   Sham    3.5 EC  0.2269353   NA
APOE2   Sham    3.5 HP  0.2253147   NA
APOE2   Sham    3.5 TH  0.3257256   NA
APOE2   Sham    7.5 CC  0.3093073   0.08619885
APOE2   Sham    7.5 EC  0.2255272   0.07652789
APOE2   Sham    7.5 HP  0.2897462   0.02708867
APOE2   Sham    7.5 TH  0.2866555   0.03557000
APOE2   Sham    9.5 CC  0.1840524   NA
APOE2   Sham    9.5 EC  0.3347699   NA

dput output:

structure(list(Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), levels = c("APOE2", "APOE3", "APOE4", 
"PS19"), class = "factor"), Treatment = c("Sham", "Sham", "Sham", 
"Sham", "Sham", "Sham", "Sham", "Sham", "Sham", "Sham", "Sham", 
"Sham", "mTBI", "mTBI", "mTBI", "mTBI", "Sham", "Sham", "Sham", 
"Sham", "Sham", "Sham", "Sham", "Sham", "mTBI", "mTBI", "mTBI", 
"mTBI"), `Age (Months)` = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), levels = c("3.5", "7.5", "9.5"), class = "factor"), 
    Region = c("CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH", 
    "CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH", "CC", "EC", 
    "HP", "TH", "CC", "EC", "HP", "TH", "CC", "EC", "HP", "TH"
    ), avg_FA = c(0.199043221771717, 0.226935303, 0.225314745679497, 
    0.325725596398115, 0.309307302666667, 0.225527225666667, 
    0.289746216333333, 0.286655532, 0.184052395, 0.334769852, 
    0.34609792, 0.237951324, 0.303560921, 0.233204448, 0.30656011, 
    0.296695315, 0.256386488908901, 0.2787716635, 0.270610670559108, 
    0.265520500484854, 0.289559764, 0.337927988666667, 0.370170086833333, 
    0.352682695666667, 0.391898785, 0.112882524, 0.29461883, 
    0.258586437), sd_FA = c(NA, NA, NA, NA, 0.0861988548059451, 
    0.0765278945487883, 0.0270886655892862, 0.0355699995442251, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0.0332061193275388, 0.0121768772823131, 
    0.0441863595350275, 0.063138972154108, 0.11418350071109, 
    0.0829486022039601, 0.0831420050073176, 0.0689368863615038, 
    NA, NA, NA, NA)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -28L), groups = structure(list(
    Condition = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 3L), levels = c("APOE2", 
    "APOE3", "APOE4", "PS19"), class = "factor"), Treatment = c("Sham", 
    "Sham", "Sham", "mTBI", "Sham", "Sham", "mTBI"), `Age (Months)` = structure(c(1L, 
    2L, 3L, 2L, 1L, 2L, 2L), levels = c("3.5", "7.5", "9.5"), class = "factor"), 
    .rows = structure(list(1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 
        25:28), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L), .drop = TRUE))

And here is what I wrote to generate the plot:

ggplot(FA_MRI_APOE, aes(x = `Age (Months)`,
                        y = avg_FA,
                        color = factor(Treatment, levels = c("Sham", "mTBI")),
                        group = factor(Treatment, levels = c("Sham", "mTBI")))) +
  geom_errorbar(aes(ymin = avg_FA - sd_FA,
                    ymax = avg_FA + sd_FA),
                width = 0.1) +
  geom_point(size = 2.5) +
  geom_line(group = 1) +
  geom_jitter(aes(x = `Age (Months)`,
                  y = `Mean`),
              position = position_jitter(0.1),
              alpha = 0.5,
              size = 1.5,
              data = FA_MRI_APOE_raw) +
  scale_color_manual(name = "Treatment", values = c("#0074C1", "#F7530B")) +
  facet_grid(Region ~ Condition) +
  theme(legend.position = "bottom")

The generated plot looks like this:
enter image description here

Essentially, I want the point for the 3.5 month old group to be the same starting point for the sham and mTBI data, but for it to then break off and connect to the points for its respective groups at the 7.5 and 9.5 month old points. Right now, it is just connecting all of the data points, which is not what I want, but I do not know how to fix it. Any help would be much appreciated :).

>Solution :

Since your data has no 3.5/mTBI rows, and you say you want the mTBI lines to start from the 3.5 points, I suggest we need to duplicate the 3.5-month rows, reclassify them as mTBI, and append them to the data.

Additionally, we need to remove group=1 from your geom_line, as it is blocking us from doing what we need with the lines. In this case, it then inherits from the original call to ggplot with its aesthetic group = factor(Treatment, levels = c("Sham", "mTBI")).

Something like this?

library(dplyr)
ungroup(FA_MRI_APOE) %>%
  filter(`Age (Months)` == "3.5") %>%
  mutate(Treatment = "mTBI") %>%
  bind_rows(FA_MRI_APOE) %>%
  ggplot(aes(x = `Age (Months)`,
                        y = avg_FA,
                        color = factor(Treatment, levels = c("Sham", "mTBI")),
                        group = factor(Treatment, levels = c("Sham", "mTBI")))) +
  geom_errorbar(aes(ymin = avg_FA - sd_FA,
                    ymax = avg_FA + sd_FA),
                width = 0.1) +
  geom_point(size = 2.5) +
  geom_line() +
  # geom_jitter(aes(x = `Age (Months)`,
  #                 y = `Mean`),
  #             position = position_jitter(0.1),
  #             alpha = 0.5,
  #             size = 1.5,
  #             data = FA_MRI_APOE_raw) +
  scale_color_manual(name = "Treatment", values = c("#0074C1", "#F7530B")) +
  facet_grid(Region ~ Condition) +
  theme(legend.position = "bottom")

enter image description here

I commented out the geom_points as I don’t have the FA_MRI_APOE_raw data.

You mentioned the possibility of extra points being a problem. If it doesn’t look right for you, we can filter out the 3.5 and mTBI data for the points alone by replacing the geom_point above with

  geom_point(size = 2.5, data = ~ filter(., `Age (Months)` != "3.5" | Treatment != "mTBI")) +

Leave a Reply