Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

R: Creating a grouped df showing mean of Variable

for my linear regression with one dichotom numeric variable (Strategic Voting) and one metric variable (Age, Q3), I need a df which shows the mean age of the people who either voted strategically (1) or not (0).

This is my code, but I get following the error message "Warning messages:
1: In mean.default(Q3) : argument is not numeric or logical: returning NA
2: In mean.default(Q3) : argument is not numeric or logical: returning NA"
Now I tried converting the data to a numerical one using as.numeric(Q3), but I cant get it to work.

Test<- Deskriptive_Statistik %>% 
  select(Q3, StrategischeWahl2021) %>% 
  group_by(StrategischeWahl2021) %>% 
  summarise(Q3 = mean(Q3))

This is my data:

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

    dput(Test)
structure(list(Q3 = c("24", "20", "20", "19", "21", "33", "27", 
"20", "53", "31", "21", "22", "21", "20", "25", "21", "24", "29", 
"53 ", "20", "21", "22", "48", "28", "20", "23", "29", "29", 
"23", "41", "29", "21", "29", "47", "23", "53", "34", "19", "23", 
"24", "29", "29", "20", "22", "29", "25", "21", "22", "29", "20", 
"30", "21", "23", "19", "23", "18", "25", "22", "28", "25", "22", 
"21", "24", "24", "29", "55", "20", "20", "21", "20", "28", "22", 
"21", "22", "20", "31", "22", "20", "31", "22", "22", "30", "20", 
"22", "18", "23", "55", "22", "25", "25", "21", "39", "22", "20", 
"49", "58", "20", "19", "21", "22", "29", "23", "32", "35", "20", 
"20", "21", "28", "24", "28", "60", "70", "43", "21", "25", "60", 
"34", "54", "24", "25", "23", "21", "48", "20", "25", "24", "21", 
"25", "22", "24", "21", "22", "21", "18", "22", "21", "22", "18", 
"19", "71", "23", "26", "18", "24", "21", "51", "37", "41", "23", 
"25", "22", "35", "21", "18", "22", "29", "26", "21", "22", "23", 
"43", "22", "23", "22", "21", "69", "20", "25", "54", "20", "26", 
"28", "23", "28", "38", "21", "22", "78", "23", "25", "25", "63", 
"32", "33", "20", "21", "20", "23", "21", "24", "19", "24", "37", 
"21", "26", "24", "21", "23", "21", "19", "22", "22", "25", "20", 
"22", "22", "19", "30", "19", "22", "19", "26", "23", "25", "21", 
"36", "25", "22", "23", "22", "23", "22", "20", "21", "29", "22", 
"19", "22", "22", "60", "29", "21", "20", "21", "23", "21", "23", 
"19", "60", "59", "20", "23", "60", "23", "24", "22", "22", "27", 
"23", "19", "22", "18", "21", "22", "19", "68", "26", "21", "20"
), StrategischeWahl2021 = c("0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", 
"0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", 
"1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "0", "0", "0", 
"0", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "1", 
"0", "0", "1", "0", "1", "1", "0", "0", "0", "1", "1", "0", "1", 
"0", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"0", "1", "1", "1", "1", "0", "1", "1", "0", "0", "0", "0", "1", 
"0", "1", "0", "0", "0", "1", "0", "0", "0", "1", "1", "0", "0", 
"1", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "1", "0", 
"1", "0", "0", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", 
"0", "0", "1", "1", "0", "1", "0", "1", "0", "0", "0", "0", "0", 
"1", "0", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", 
"0", "0", "1", "1", "0", "0", "0", "1", "0", "0", "1", "0", "1", 
"0", "0", "0", "0", "0", "1", "0", "1", "1", "0", "1", "0", "0", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "1", "1", "1", "0", 
"0", "0", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0")), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -259L), groups = structure(list(
    StrategischeWahl2021 = c("0", "1"), .rows = structure(list(
        c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 27L, 
        28L, 29L, 30L, 32L, 33L, 34L, 35L, 36L, 37L, 39L, 40L, 
        41L, 42L, 43L, 44L, 46L, 49L, 52L, 54L, 56L, 57L, 58L, 
        59L, 60L, 61L, 63L, 64L, 65L, 68L, 69L, 70L, 71L, 72L, 
        73L, 74L, 75L, 76L, 77L, 78L, 79L, 81L, 82L, 84L, 86L, 
        87L, 89L, 92L, 93L, 94L, 97L, 99L, 102L, 103L, 104L, 
        105L, 106L, 108L, 109L, 110L, 112L, 117L, 120L, 121L, 
        122L, 123L, 125L, 127L, 128L, 129L, 131L, 132L, 133L, 
        136L, 137L, 139L, 141L, 142L, 143L, 144L, 145L, 147L, 
        148L, 149L, 152L, 153L, 154L, 155L, 157L, 159L, 160L, 
        161L, 163L, 165L, 166L, 168L, 169L, 172L, 173L, 174L, 
        175L, 176L, 177L, 178L, 181L, 183L, 185L, 186L, 187L, 
        188L, 189L, 191L, 193L, 195L, 196L, 198L, 199L, 200L, 
        201L, 202L, 203L, 204L, 207L, 208L, 209L, 211L, 212L, 
        214L, 216L, 217L, 218L, 219L, 220L, 222L, 225L, 227L, 
        228L, 230L, 231L, 232L, 233L, 235L, 237L, 241L, 242L, 
        243L, 244L, 246L, 247L, 248L, 251L, 252L, 253L, 256L, 
        257L, 258L, 259L), c(13L, 24L, 25L, 26L, 31L, 38L, 45L, 
        47L, 48L, 50L, 51L, 53L, 55L, 62L, 66L, 67L, 80L, 83L, 
        85L, 88L, 90L, 91L, 95L, 96L, 98L, 100L, 101L, 107L, 
        111L, 113L, 114L, 115L, 116L, 118L, 119L, 124L, 126L, 
        130L, 134L, 135L, 138L, 140L, 146L, 150L, 151L, 156L, 
        158L, 162L, 164L, 167L, 170L, 171L, 179L, 180L, 182L, 
        184L, 190L, 192L, 194L, 197L, 205L, 206L, 210L, 213L, 
        215L, 221L, 223L, 224L, 226L, 229L, 234L, 236L, 238L, 
        239L, 240L, 245L, 249L, 250L, 254L, 255L)), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))

Thank you very much in advance 😉

>Solution :

There were some additional attributes in the data, which was removed with ungroup, and then do the group by, summarise

library(dplyr)
Test %>% 
   ungroup %>%
   group_by(StrategischeWahl2021) %>%
   summarise(Q3 = mean(as.numeric(Q3), na.rm = TRUE))

-output

# A tibble: 2 × 2
  StrategischeWahl2021    Q3
  <chr>                <dbl>
1 0                     26.9
2 1                     27.6
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading