Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to draw boxplot by multiple groups using ggplot2?

I try to get a boxplot with the following specifications for the following variables:
assets, liability.

My data is firms financial statement and firms are classified big and small firms (categorical variable lbg30). Time (years) is also categorized by two period pre-crisis and post-crisis (categorical variable postcrisis). So I want to draw boxplots of assets and liability for small firms vs big firms and also pre-crisis vs post-crisis.

structure(list(firmid = structure(c("000020", "000020", "000020", 
"000020", "000020", "000020", "000021", "000021", "000020", "000021"
), label = "거래소코드", format.stata = "%9s"), year = structure(c(1991, 
1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000), format.stata = "%9.0g"), 
    postcrisis = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1), format.stata = "%9.0g"), 
    firm_kor = structure(c("동화약품(주)", "동화약품(주)", 
    "동화약품(주)", "동화약품(주)", "동화약품(주)", 
    "동화약품(주)", "동화약(주)", "동화약(주)", 
    "동화약품(주)", "동화약(주)"), label = "회사명", format.stata = "%44s"), 
    business_group = structure(c("동화약", "동화약", "동화약", 
    "동화약", "동화약", "동화약", "동화약", "동화약", 
    "동화약", "동화약"), label = "그룹사명", format.stata = "%33s"), 
    lbg30 = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), format.stata = "%9.0g"), 
    lbg = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), label = "기업규모코드", format.stata = "%10.0gc"), 
    bg = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), format.stata = "%9.0g"), 
    size = structure(c("", "", "", "", "", "", "", "", "", ""
    ), label = "기업규모명", format.stata = "%12s"), assets = structure(c(12150840320, 
    15652244480, 16448676864, 19630718976, 29004148736, 28329910272, 
    27457734656, 62851514368, 59374006272, 50737635328), format.stata = "%9.0g"), 
    liability = structure(c(54948823040, 66054799360, 81120837632, 
    106961879040, 122920968192, 140161728512, 162787033088, 159752404992, 
    149670641664, 105075081216), format.stata = "%9.0g"), sales = structure(c(88381997056, 
    102572998656, 114394996736, 119775002624, 128408002560, 134840000512, 
    143815000064, 141186007040, 136299003904, 138230005760), format.stata = "%9.0g"), 
    profit = structure(c(44432998400, 50231001088, 55298998272, 
    58389999616, 63920001024, 62578999296, 67171000320, 69623996416, 
    59872002048, 53057998848), format.stata = "%9.0g"), ebit = structure(c(19534999552, 
    19583000576, 21048999936, 21987000320, 25397999616, 23047999488, 
    21745000448, 26130999296, 23641999360, 1.458e+09), label = "Earning before interest and taxes (million won)", format.stata = "%9.0g"), 
    va = structure(c(25720190976, 32258500608, 35595018240, 34623062016, 
    41200451584, 43741118464, 48058458112, 50603368448, 70541492224, 
    22522920960), format.stata = "%9.0g"), va_pw = structure(c(26930000, 
    32920000, 36430000, 34010000, 41870000, 45090000, 49540000, 
    55730000, 88180000, 30440000), format.stata = "%9.0g"), va_ratio = structure(c(29.1000003814697, 
    31.4500007629395, 31.1200008392334, 28.9099998474121, 32.0900001525879, 
    32.439998626709, 33.4199981689453, 35.8400001525879, 51.75, 
    16.2900009155273), format.stata = "%9.0g"), k_productivity = structure(c(819.200012207031, 
    588.530029296875, 744.309997558594, 608.419982910156, 702.099975585938, 
    779.320007324219, 911.700012207031, 991.530029296875, 1964.06994628906, 
    502.309997558594), format.stata = "%9.0g"), k_productivity_gross = structure(c(31.4400005340576, 
    33.1300010681152, 29.6599998474121, 23.4799995422363, 25.0900001525879, 
    23.0799999237061, 22.5599994659424, 19.7700004577637, 26.2700004577637, 
    9.72999954223633), format.stata = "%9.0g"), wb = structure(c(8572080128, 
    9890159616, 10399187968, 12745639936, 14407654400, 15426884608, 
    17462267904, 16719245312, 14328732672, 15299931136), format.stata = "%9.0g"), 
    deprec = structure(c(1540752000, 1781939968, 2044096000, 
    2322487040, 2697072896, 3057124096, 3395273984, 1194128000, 
    1957659008, 2335313920), format.stata = "%9.0g"), cogs = structure(c(43948998656, 
    52342001664, 59095998464, 61384998912, 64488001536, 72260001792, 
    76643999744, 71562002432, 76427001856, 85172002816), format.stata = "%9.0g"), 
    land = structure(c(3962739968, 4533998080, 4673968128, 5412840960, 
    15167494144, 15234676736, 15215484928, 44340424704, 43726028800, 
    34115977216), format.stata = "%9.0g"), facilities = structure(c(5593545216, 
    6132070912, 7142042112, 8962248704, 9655307264, 9766675456, 
    9834147840, 12669279232, 12533188608, 12470169600), format.stata = "%9.0g"), 
    structures = structure(c(428073984, 439003008, 453208992, 
    453208992, 462492992, 487492992, 493648000, 298323008, 309323008, 
    352156992), format.stata = "%9.0g"), machinery = structure(c(7848509952, 
    12346684416, 13176728576, 15883726848, 18024470528, 20001619968, 
    5274594816, 20351035392, 21328994304, 25123174400), format.stata = "%9.0g"), 
    mold_pattern = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), 
    machinery_heavy = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), format.stata = "%9.0g"), equipment = structure(c(0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), devices = structure(c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

>Solution :

As lbg30 of your data has only 0, I modified half of those to 1, where df is data you provided.

df1 <- df
df1$lbg30[6:10] <- 1

Then you may try

library(ggplot2)
df1 %>%
  mutate(postcrisis = as.factor(postcrisis),
         lbg30 = as.factor(lbg30)) %>%
  ggplot(aes(x = lbg30, group = lbg30)) +
  
  geom_boxplot(aes(y = assets), col = "red") +
  geom_boxplot(aes(y = liability), col = "blue") +
  facet_wrap(.~postcrisis)

enter image description here

Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading