Mock data:
df = structure(list(country = c("USA", "USA", "Japan", NA), dimension = c("economic",
"cultural", "economic", "economic"), score = c(NA, "high", "high",
"low")), class = "data.frame", row.names = c(NA, -4L))
I wrote this function to summarise frequencies of each variable, then export them to a csv with the name of the variable as the file name:
export <- function(df){
for (col in colnames(df)) {
table <- df %>%
group_by(df[col]) %>%
summarise(Count = n()) %>%
mutate(Percent = Count / sum(Count)*100,
N = sum(Count))
write.csv(table, paste0(col, ".csv"), row.names = F)
print(table)
}
}
export(df) --> this works
But I would like to remove the NA before grouping the data and computing frequencies. I did so:
export <- function(df){
for (col in colnames(df)) {
table <- df %>%
filter(!is.na(df[col])) %>% # Attempt to filter out NAs
group_by(df[col]) %>%
summarise(Count = n()) %>%
mutate(Percent = Count / sum(Count)*100,
N = sum(Count))
write.csv(table, paste0(col, ".csv"), row.names = F)
print(table)
}
}
export(df) --> this does not work, and I get this error message:
Error in `group_by()`:
ℹ In argument: `df[col]`.
Caused by error:
! `df[col]` must be size 3 or 1, not 4.
How do I remove these NAs?? I must have made a silly mistake here.
CURRENT OUTPUT (only the first of the three variables to iterate over is shown):
country Count Percent N
Japan 1 25 4
USA 2 50 4
NA 1 25 4
DESIRED OUTPUT (only the first of the three variables to iterate over is shown):
country Count Percent N
Japan 1 33.33333 3
USA 2 66.66667 3
Note that NA are discarded and not included in frequencies.
>Solution :
Here’s a few options, all giving the same result:
export2 <- function(df){
for (col in colnames(df)) {
table <- df %>%
filter(if_any(all_of(col), \(x) !is.na(x))) |>
summarise(Count = n(), .by = all_of(col)) %>%
mutate(Percent = Count / sum(Count)*100,
N = sum(Count))
#write.csv(table, paste0(col, ".csv"), row.names = F)
print(table)
}
}
export2(df)
# country Count Percent N
# 1 USA 2 66.66667 3
# 2 Japan 1 33.33333 3
# dimension Count Percent N
# 1 economic 3 75 4
# 2 cultural 1 25 4
# score Count Percent N
# 1 high 2 66.66667 3
# 2 low 1 33.33333 3
export3 <- function(df){
for (col in colnames(df)) {
table <- df %>%
select(all_of(col)) |>
na.omit() |>
summarise(Count = n(), .by = all_of(col)) %>%
mutate(Percent = Count / sum(Count)*100,
N = sum(Count))
#write.csv(table, paste0(col, ".csv"), row.names = F)
print(table)
}
}
export3(df)
# same as above
export4 <- function(df){
for (col in colnames(df)) {
table <- df %>%
select(all_of(col)) |>
na.omit() |>
count(.data[[col]], name = "Count") |>
mutate(Percent = Count / sum(Count)*100,
N = sum(Count))
#write.csv(table, paste0(col, ".csv"), row.names = F)
print(table)
}
}
export4(df)
# same as above