Simple group bar chart with dual axis issue

I have the following data frame:

structure(list(Article = c("Compaq Presario 100", "IBM 500", 
"AST Intel 150", "AST Intel 200", "Compaq Presario 100", "AST Intel 150", 
"AST Intel 200", "IBM 500", "Compaq Presario 100", "IBM 500", 
"AST Intel 200", "Compaq Presario 100", "IBM 500", "AST Intel 150", 
"Compaq Presario 100", "AST Intel 200", "Compaq Presario 100", 
"AST Intel 200", "Compaq Presario 100", "IBM 500", "AST Intel 150", 
"Compaq Presario 100", "IBM 500", "AST Intel 200", "AST Intel 150", 
"AST Intel 150", "IBM 500", "AST Intel 200", "Compaq Presario 100", 
"AST Intel 200", "AST Intel 150", "Compaq Presario 100", "IBM 500", 
"IBM 500", "AST Intel 200", "Compaq Presario 100", "AST Intel 200", 
"IBM 500", "AST Intel 150", "Compaq Presario 100", "AST Intel 200", 
"AST Intel 150", "IBM 500", "AST Intel 200", "AST Intel 200", 
"AST Intel 150", "IBM 500", "Compaq Presario 100", "AST Intel 200", 
"AST Intel 150", "IBM 500", "Compaq Presario 100", "AST Intel 200", 
"IBM 500", "IBM 500", "AST Intel 150", "AST Intel 200", "Compaq Presario 100", 
"IBM 500", "AST Intel 200", "IBM 500", "AST Intel 150", "Compaq Presario 100", 
"AST Intel 200", "AST Intel 200", "AST Intel 200", "IBM 500", 
"AST Intel 150", "Compaq Presario 100", "AST Intel 150", "IBM 500", 
"AST Intel 200", "AST Intel 200", "Compaq Presario 100", "AST Intel 150", 
"AST Intel 200", "Compaq Presario 100", "IBM 500", "Compaq Presario 100", 
"AST Intel 200", "Compaq Presario 100", "AST Intel 150", "AST Intel 200", 
"AST Intel 200", "Compaq Presario 100", "Compaq Presario 100", 
"IBM 500", "AST Intel 200", "AST Intel 200", "IBM 500", "AST Intel 150", 
"Compaq Presario 100", "AST Intel 200", "AST Intel 200", "IBM 500", 
"IBM 500", "AST Intel 150", "Compaq Presario 100", "Compaq Presario 100", 
"Compaq Presario 100", "IBM 500", "AST Intel 200", "Compaq Presario 100", 
"IBM 500", "AST Intel 150", "Compaq Presario 100", "AST Intel 200", 
"IBM 500", "AST Intel 150"), Quantite = c(12L, 2L, 5L, 3L, 13L, 
2L, 2L, 4L, 4L, 2L, 6L, 6L, 6L, 3L, 8L, 8L, 11L, 11L, 14L, 7L, 
6L, 23L, 3L, 2L, 12L, 24L, 8L, 9L, 6L, 4L, 2L, 16L, 3L, 7L, 6L, 
5L, 23L, 4L, 7L, 12L, 1L, 9L, 6L, 2L, 4L, 1L, 11L, 8L, 7L, 13L, 
2L, 5L, 3L, 5L, 7L, 21L, 7L, 7L, 9L, 8L, 10L, 4L, 12L, 3L, 4L, 
5L, 12L, 5L, 13L, 12L, 5L, 22L, 13L, 8L, 33L, 10L, 11L, 4L, 13L, 
2L, 15L, 2L, 4L, 22L, 21L, 19L, 6L, 2L, 8L, 7L, 4L, 5L, 8L, 5L, 
6L, 5L, 6L, 6L, 31L, 5L, 2L, 6L, 11L, 3L, 3L, 15L, 4L, 3L, 2L
), PrixTotalAvecRabais = c(19503, 4598, 13450, 9570, 21128.25, 
5380, 6380, 9196, 6600, 4598, 18852.9, 9751.5, 13587.09, 8070, 
13002, 25137.2, 17877.75, 34563.65, 22753.5, 15851.605, 15897.9, 
36811.5, 6897, 6380, 31795.8, 62623.2, 18116.12, 28279.35, 9751.5, 
12760, 5380, 25608, 6897, 15851.605, 18852.9, 8250, 71168.9, 
9196, 18547.55, 19503, 3190, 23846.85, 13587.09, 6380, 12760, 
2690, 24909.665, 13002, 21995.05, 34445.45, 4598, 8250, 9570, 
11495, 15851.605, 54795.3, 21995.05, 11376.75, 20380.635, 25137.2, 
22645.15, 10760, 19503, 9570, 12760, 15950, 27174.18, 13450, 
21128.25, 31795.8, 11495, 68074.6, 40847.95, 13002, 85219.2, 
31421.5, 17877.75, 9196, 21128.25, 6380, 24378.75, 5380, 12760, 
68074.6, 33610.5, 30409.5, 13587.09, 6380, 25137.2, 15851.605, 
10760, 8250, 25137.2, 15950, 13587.09, 11495, 15897.9, 9751.5, 
49104, 8250, 4598, 18852.9, 17877.75, 6897, 8070, 24378.75, 12760, 
6897, 5380)), class = "data.frame", row.names = c(NA, -109L))

I try do to with these data a simple grouped bar chart with dual axis and values of top of each bar. For this purpose since weeks I’m dealing with the code below but can’t do better:

library(ggplot2)
library(tidyr)
library(dplyr)

#consistency check for later
DF %>% group_by(Article) %>% summarize_at(c("Quantite","PrixTotalAvecRabais"),sum,na.rm=TRUE)

#we prepare a special dataframe trick format to get the expected result
DFlong <- DF |> pivot_longer(cols = -Article,names_to = "Type") |> 
  mutate(scaled_value=ifelse(Type=="PrixTotalAvecRabais",value,value*1000 ))

#the plot itself
ggplot(DFlong ,aes(x=Article, y = scaled_value,fill= Type)) + 
   geom_bar(stat="identity",position="dodge",na.rm=TRUE) + 
  scale_y_continuous(sec.axis = sec_axis(~ . / 100, name = "Quantite"))+
  labs(y="PrixTotalAvecRabais")+theme(legend.position="bottom")+
  geom_text(aes(label = scaled_value), vjust = 0)

As you can see in the image below. The values of quantities and cashflow relatively to the right axis are completely wrong and also multiple labels appears…

enter image description here

Any help is greatly appreciated

Thanks

>Solution :

The main issue is that you have multiple values per Article and Type. Hence you get multiple labels. To fix that I would suggest to aggregate your data before passing it to ggplot(). Second, to align the labels with the bars you have to dodge the labels using position = position_dodge(width = .9). Third, for the label you probably want the value not the scaled_value. Finally, in sec.axis you divided by 100 but scaled your data by 1000.

library(ggplot2)
library(tidyr)
library(dplyr)

# consistency check for later
DF %>%
  group_by(Article) %>%
  summarize_at(c("Quantite", "PrixTotalAvecRabais"), sum, na.rm = TRUE)

# we prepare a special dataframe trick format to get the expected result
DFlong <- DF |>
  pivot_longer(cols = -Article, names_to = "Type") |>
  group_by(Article, Type) |> 
  summarise(value = sum(value), .groups = "drop") |> 
  mutate(scaled_value = ifelse(Type == "PrixTotalAvecRabais", value, value * 1000))

# the plot itself
ggplot(DFlong, aes(x = Article, y = scaled_value, fill = Type)) +
  geom_col(position = "dodge", na.rm = TRUE, color = "black") +
  scale_y_continuous(sec.axis = sec_axis(~ . / 1000, name = "Quantite")) +
  labs(y = "PrixTotalAvecRabais") +
  theme(legend.position = "bottom") +
  geom_text(aes(label = value), vjust = -.3, position = position_dodge(width = .9))

enter image description here

Leave a Reply