Home How to group by a dataframe by all columns except one column (data frame has more than 50 columns)

Questions

How to group by a dataframe by all columns except one column (data frame has more than 50 columns)

January 16, 2022

I have a dataframe that has more than 50 columns, and I want to group by this dataframe with all columns except one column with the name of "logg_overall_assess_current". I want to find the mean of this variable. Below is a sample of dataframe

structure(list(Bedroom = c(1, 2, 1, 2, 1), logg_overall_assess_current = c(13.1495893636579, 
13.4744277087854, 13.3046849341983, 13.7255858504363, 13.2321142413634
), TypeDwel1.2.Duplex = c(0, 0, 0, 0, 0), TypeDwelApartment.Condo = c(1, 
1, 1, 1, 1), TypeDwelOther = c(0, 0, 0, 0, 0), TypeDwelTownhouse = c(0, 
0, 0, 0, 0), Age_new.70 = c(0, 0, 0, 0, 0), Age_new0.1 = c(0, 
0, 0, 0, 0), Age_new16.40 = c(1, 1, 1, 1, 1), Age_new2.5 = c(0, 
0, 0, 0, 0), Age_new41.70 = c(0, 0, 0, 0, 0), Age_new6.15 = c(0, 
0, 0, 0, 0), LandFreehold = c(1, 1, 1, 1, 1), LandLeasehold.prepaid = c(0, 
0, 0, 0, 0), LandOthers = c(0, 0, 0, 0, 0), cluster_K_mean.1 = c(0, 
0, 0, 0, 0), cluster_K_mean.2 = c(0, 0, 0, 0, 0), cluster_K_mean.3 = c(0, 
0, 0, 0, 0), cluster_K_mean.4 = c(1, 1, 1, 1, 1), Sold.Year.2018 = c(0, 
0, 0, 0, 0), Sold.Year.2019 = c(1, 1, 1, 1, 1), Sold.Year.2020 = c(0, 
0, 0, 0, 0), S.A.Cambie = c(0, 0, 0, 0, 0), S.A.Champlain.Heights = c(0, 
0, 0, 0, 0), S.A.Coal.Harbour = c(0, 0, 0, 0, 0), S.A.Collingwood.VE = c(0, 
0, 0, 0, 0), S.A.Downtown.VE = c(0, 0, 0, 0, 0), S.A.Downtown.VW = c(1, 
1, 1, 1, 1), S.A.Dunbar = c(0, 0, 0, 0, 0), S.A.Fairview.VW = c(0, 
0, 0, 0, 0), S.A.False.Creek = c(0, 0, 0, 0, 0), S.A.Fraser.VE = c(0, 
0, 0, 0, 0), S.A.Fraserview.VE = c(0, 0, 0, 0, 0), S.A.Grandview.VE = c(0, 
0, 0, 0, 0), S.A.Grandview.Woodland = c(0, 0, 0, 0, 0), S.A.Hastings = c(0, 
0, 0, 0, 0), S.A.Hastings.Sunrise = c(0, 0, 0, 0, 0), S.A.Kerrisdale = c(0, 
0, 0, 0, 0), S.A.Killarney.VE = c(0, 0, 0, 0, 0), S.A.Kitsilano = c(0, 
0, 0, 0, 0), S.A.Knight = c(0, 0, 0, 0, 0), S.A.Main = c(0, 0, 
0, 0, 0), S.A.Marpole = c(0, 0, 0, 0, 0), S.A.Mount.Pleasant.VE = c(0, 
0, 0, 0, 0), S.A.Mount.Pleasant.VW = c(0, 0, 0, 0, 0), S.A.Oakridge.VW = c(0, 
0, 0, 0, 0), S.A.Point.Grey = c(0, 0, 0, 0, 0), S.A.Quilchena = c(0, 
0, 0, 0, 0), S.A.Renfrew.VE = c(0, 0, 0, 0, 0), S.A.S.W..Marine = c(0, 
0, 0, 0, 0), S.A.Shaughnessy = c(0, 0, 0, 0, 0), S.A.South.Cambie = c(0, 
0, 0, 0, 0), S.A.South.Granville = c(0, 0, 0, 0, 0), S.A.South.Marine = c(0, 
0, 0, 0, 0), S.A.South.Vancouver = c(0, 0, 0, 0, 0), S.A.Strathcona = c(0, 
0, 0, 0, 0), S.A.University.VW = c(0, 0, 0, 0, 0), S.A.Victoria.VE = c(0, 
0, 0, 0, 0), S.A.West.End.VW = c(0, 0, 0, 0, 0), S.A.Yaletown = c(0, 
0, 0, 0, 0), S.A.Other = c(0, 0, 0, 0, 0)), row.names = c("198", 
"319", "387", "413", "439"), class = "data.frame")

I am looking for a smart way to group by all these variables without typing their name one by one.

>Solution :

We can use dplyr::across in group_by:

library(dplyr)

df %>%
  group_by(across(-logg_overall_assess_current)) %>% 
  summarise(mean = mean(logg_overall_assess_current, na.rm = TRUE))

#> `summarise()` has grouped output by 'Bedroom', 'TypeDwel1.2.Duplex',
#> 'TypeDwelApartment.Condo', 'TypeDwelOther', 'TypeDwelTownhouse', 'Age_new.
#> 70', 'Age_new0.1', 'Age_new16.40', 'Age_new2.5', 'Age_new41.70',
#> 'Age_new6.15', 'LandFreehold', 'LandLeasehold.prepaid', 'LandOthers',
#> 'cluster_K_mean.1', 'cluster_K_mean.2', 'cluster_K_mean.3', 'cluster_K_mean.
#> 4', 'Sold.Year.2018', 'Sold.Year.2019', 'Sold.Year.2020', 'S.A.Cambie',
#> 'S.A.Champlain.Heights', 'S.A.Coal.Harbour', 'S.A.Collingwood.VE',
#> 'S.A.Downtown.VE', 'S.A.Downtown.VW', 'S.A.Dunbar', 'S.A.Fairview.VW',
#> 'S.A.False.Creek', 'S.A.Fraser.VE', 'S.A.Fraserview.VE', 'S.A.Grandview.VE',
#> 'S.A.Grandview.Woodland', 'S.A.Hastings', 'S.A.Hastings.Sunrise',
#> 'S.A.Kerrisdale', 'S.A.Killarney.VE', 'S.A.Kitsilano', 'S.A.Knight',
#> 'S.A.Main', 'S.A.Marpole', 'S.A.Mount.Pleasant.VE', 'S.A.Mount.Pleasant.VW',
#> 'S.A.Oakridge.VW', 'S.A.Point.Grey', 'S.A.Quilchena', 'S.A.Renfrew.VE',
#> 'S.A.S.W..Marine', 'S.A.Shaughnessy', 'S.A.South.Cambie', 'S.A.South.Granville',
#> 'S.A.South.Marine', 'S.A.South.Vancouver', 'S.A.Strathcona',
#> 'S.A.University.VW', 'S.A.Victoria.VE', 'S.A.West.End.VW', 'S.A.Yaletown'. You
#> can override using the `.groups` argument.
#> # A tibble: 2 x 61
#> # Groups:   Bedroom, TypeDwel1.2.Duplex, TypeDwelApartment.Condo,
#> #   TypeDwelOther, TypeDwelTownhouse, Age_new.70, Age_new0.1, Age_new16.40,
#> #   Age_new2.5, Age_new41.70, Age_new6.15, LandFreehold, LandLeasehold.prepaid,
#> #   LandOthers, cluster_K_mean.1, cluster_K_mean.2, cluster_K_mean.3,
#> #   cluster_K_mean.4, Sold.Year.2018, Sold.Year.2019, Sold.Year.2020,
#> #   S.A.Cambie, S.A.Champlain.Heights, S.A.Coal.Harbour, S.A.Collingwood.VE,
#> #   S.A.Downtown.VE, S.A.Downtown.VW, S.A.Dunbar, S.A.Fairview.VW,
#> #   S.A.False.Creek, S.A.Fraser.VE, S.A.Fraserview.VE, S.A.Grandview.VE,
#> #   S.A.Grandview.Woodland, S.A.Hastings, S.A.Hastings.Sunrise, S.A.Kerrisdale,
#> #   S.A.Killarney.VE, S.A.Kitsilano, S.A.Knight, S.A.Main, S.A.Marpole,
#> #   S.A.Mount.Pleasant.VE, S.A.Mount.Pleasant.VW, S.A.Oakridge.VW,
#> #   S.A.Point.Grey, S.A.Quilchena, S.A.Renfrew.VE, S.A.S.W..Marine,
#> #   S.A.Shaughnessy, S.A.South.Cambie, S.A.South.Granville, S.A.South.Marine,
#> #   S.A.South.Vancouver, S.A.Strathcona, S.A.University.VW, S.A.Victoria.VE,
#> #   S.A.West.End.VW, S.A.Yaletown [2]
#>   Bedroom TypeDwel1.2.Duplex TypeDwelApartment.C… TypeDwelOther TypeDwelTownhou…
#>     <dbl>              <dbl>                <dbl>         <dbl>            <dbl>
#> 1       1                  0                    1             0                0
#> 2       2                  0                    1             0                0
#> # … with 56 more variables: Age_new.70 <dbl>, Age_new0.1 <dbl>,
#> #   Age_new16.40 <dbl>, Age_new2.5 <dbl>, Age_new41.70 <dbl>,
#> #   Age_new6.15 <dbl>, LandFreehold <dbl>, LandLeasehold.prepaid <dbl>,
#> #   LandOthers <dbl>, cluster_K_mean.1 <dbl>, cluster_K_mean.2 <dbl>,
#> #   cluster_K_mean.3 <dbl>, cluster_K_mean.4 <dbl>, Sold.Year.2018 <dbl>,
#> #   Sold.Year.2019 <dbl>, Sold.Year.2020 <dbl>, S.A.Cambie <dbl>,
#> #   S.A.Champlain.Heights <dbl>, S.A.Coal.Harbour <dbl>,
#> #   S.A.Collingwood.VE <dbl>, S.A.Downtown.VE <dbl>, S.A.Downtown.VW <dbl>,
#> #   S.A.Dunbar <dbl>, S.A.Fairview.VW <dbl>, S.A.False.Creek <dbl>,
#> #   S.A.Fraser.VE <dbl>, S.A.Fraserview.VE <dbl>, S.A.Grandview.VE <dbl>,
#> #   S.A.Grandview.Woodland <dbl>, S.A.Hastings <dbl>,
#> #   S.A.Hastings.Sunrise <dbl>, S.A.Kerrisdale <dbl>, S.A.Killarney.VE <dbl>,
#> #   S.A.Kitsilano <dbl>, S.A.Knight <dbl>, S.A.Main <dbl>, S.A.Marpole <dbl>,
#> #   S.A.Mount.Pleasant.VE <dbl>, S.A.Mount.Pleasant.VW <dbl>,
#> #   S.A.Oakridge.VW <dbl>, S.A.Point.Grey <dbl>, S.A.Quilchena <dbl>,
#> #   S.A.Renfrew.VE <dbl>, S.A.S.W..Marine <dbl>, S.A.Shaughnessy <dbl>,
#> #   S.A.South.Cambie <dbl>, S.A.South.Granville <dbl>, S.A.South.Marine <dbl>,
#> #   S.A.South.Vancouver <dbl>, S.A.Strathcona <dbl>, S.A.University.VW <dbl>,
#> #   S.A.Victoria.VE <dbl>, S.A.West.End.VW <dbl>, S.A.Yaletown <dbl>,
#> #   S.A.Other <dbl>, mean <dbl>

^{Created on 2022-01-15 by the reprex package (v0.3.0)}