R: Sort points in ggplot

August 30, 2022

I have the following dataframe and I want to generate a scatter plot with points ordered by date.

 structure(list(cluster = c("uid_1", "uid_2", "uid_3", "uid_4", 
    "uid_5", "uid_6", "Cluster_07", "Cluster_07", "Cluster_07", "Cluster_07", 
    "Cluster_07", "uid_12", "uid_13", "Cluster_07", "Cluster_07", 
    "uid_16", "Cluster_07", "Cluster_07", "uid_19", "uid_20", "uid_21", 
    "uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27", "uid_28", 
    "uid_29", "Cluster_50", "uid_31", "uid_32", "uid_33", "uid_34", 
    "uid_35", "uid_36", "Cluster_50", "uid_38", "uid_39", "uid_40", 
    "Cluster_50"), name = c("E569", "E847", "E848", "E882", "E398", 
    "E443", "E462", "E970", "E1078", "E831", "E866", "E1047", "E964", 
    "E507", "E774", "E1106", "E943", "E1069", "E867", "E868", "E44", 
    "E60", "E869", "E482", "E125", "E126", "E114", "E123", "E81", 
    "E504", "E178", "E179", "E180", "E181", "E299", "E793", "E219", 
    "E182", "E183", "E184", "E650"), date = structure(c(18281, 18396, 
    18396, 18414, 18441, 18526, 18586, 18267, 18300, 18317, 18418, 
    18437, 18461, 18469, 18470, 18476, 18497, 18501, 18564, 18568, 
    18341, 18353, 18383, 18401, 18411, 18411, 18423, 18435, 18495, 
    18498, 18542, 18552, 18559, 18559, 18598, 18604, 18617, 18277, 
    18277, 18280, 18340), class = "Date"), group_id = c(3L, 8L, 19L, 
    28L, 30L, 31L, 1L, 1L, 1L, 1L, 1L, 4L, 5L, 1L, 1L, 6L, 1L, 1L, 
    7L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 2L, 20L, 
    21L, 22L, 23L, 24L, 25L, 2L, 26L, 27L, 29L, 2L)), row.names = c(NA, 
    -41L), groups = structure(list(cluster = c("Cluster_07", "Cluster_50", 
    "uid_1", "uid_12", "uid_13", "uid_16", "uid_19", "uid_2", "uid_20", 
    "uid_21", "uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27", 
    "uid_28", "uid_29", "uid_3", "uid_31", "uid_32", "uid_33", "uid_34", 
    "uid_35", "uid_36", "uid_38", "uid_39", "uid_4", "uid_40", "uid_5", 
    "uid_6"), .rows = structure(list(c(7L, 8L, 9L, 10L, 11L, 14L, 
    15L, 17L, 18L), c(30L, 37L, 41L), 1L, 12L, 13L, 16L, 19L, 2L, 
        20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 3L, 31L, 
        32L, 33L, 34L, 35L, 36L, 38L, 39L, 4L, 40L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -31L), class = c("tbl_df", 
    "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
    "tbl_df", "tbl", "data.frame"))

I could generate the plot using the following code.

 ggplot(subset_so, aes(reorder(date,factor(cluster)), reorder(factor(cluster),date))) +
  geom_line(aes(group=cluster), color='black') +
  geom_point(size=3) +
  ylab("cluster") + xlab("date") +
  theme_light() +
  theme(
    legend.position = "bottom",
    legend.key=element_rect(fill='gray96'),
    legend.title =element_text(size=10),
    text=element_text(size=12),
    axis.title.x = element_text(vjust = 0, size = 11),
    axis.title.y = element_text(vjust = 2, size = 11),
    axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank())

But what I want is, irrespective of other points, y axis should sort by date based on the first point in Cluster_07 and Cluster_50.

For example,

Cluster_07 has the earliest date so should be at the bottom-most on the y-axis. Similarly Cluster_50 should be placed before uid_21 in the y-axis.

May I know, if there is a way to do this? Many thanks in advance!

>Solution :

It sounds like you want to sort clusters based on their minimum date. We can do this by ungrouping (so the clusters can be compared to others) and then I like forcats::fct_reorder where the three main parameters are 1) the vector to be ordered, 2) the variable to sort by, 3) the summary function to use.

library(dplyr); library(ggplot2)
subset_so %>%
  ungroup() %>% 
  mutate(cluster = forcats::fct_reorder(cluster, date, min)) %>%
ggplot(aes(date, cluster)) +
  geom_line(aes(group=cluster), color='black') +
  geom_point(size=3) +
  ylab("cluster") + xlab("date") +
  theme_light() +
  theme(
    legend.position = "bottom",
    legend.key=element_rect(fill='gray96'),
    legend.title =element_text(size=10),
    text=element_text(size=12),
    axis.title.x = element_text(vjust = 0, size = 11),
    axis.title.y = element_text(vjust = 2, size = 11),
    axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank())