Do you guys have any tips when it comes to shortening your code or using logic? For instance, I find myself manually including repeated field at times, when I know a loop or condition might be an easy fix, but I’m having a hard time with learning when to use what logic and how.
Ex. In this example I tried to make all the information uniform to merge them into one file (the iteration actually continues 7xs but I shortened it for simplicity). When it comes to larger data sets I’m not sure it’d be feasible to write something like this. Maybe this just comes with time/practice?
library(tidyverse)
library(readxl)
library(dplyr)
library(rio)
# Get file path
files <- list.files(
path = "C:/Users/Username/OneDrive - Company, Inc/Documents/Competitor Dashboard/Personnel",
pattern = ".xlsx",
full.names = TRUE)
## 1
# Read file
collds_r <- read_excel(files [1])
# Add column
# collds_r[" "] <- " "
# Rename
colnames(collds_r)[colnames(collds_r) %in% c("Product(s)...7",
"Service Line(s)...8")] <- c("Product(s)",
"Service Line(s)")
# Delete specified columns
collds_r <- collds_r[ , -which(names(collds_r) %in% c("Product(s)...10","Service Line(s)...11",
"...14","Team 4","Team 5","Team 6","...19",
"Total Available Space (SF)","Team"))]
# Merge/Rearrange
cr <- collds_r %>% unite(Contact_Name,Contact,...2, sep = " ") %>%
select(`Contact Company`,Contact_Name,`Contact Title`,Office,`Primary Practice`,
everything(vars = NULL))
# Export
#export(cr,"up.xlsx")
## 2
hughes_r <- read_excel(files [2])
colnames(hughes_r)[colnames(hughes_r) %in% c("Contact", "Product(s)...6",
"Service Line(s)...7")] <- c("Contact_Name","Product(s)",
"Service Line(s)")
hughes_r <- hughes_r[ , -which(names(hughes_r) %in% c("Product(s)...9","Service Line(s)...10",
"Team 2","Team 3","Team 4","Team 5","Team 6","Team",
"Total Space Available"))]
hr <- hughes_r %>% select(`Contact Company`,Contact_Name,`Contact Title`,Office,
`Primary Practice`,`Product(s)`,`Service Line(s)`,`Team Combined`,
Email,`Work Phone`)
## 3
jll_r <- read_excel(files [3])
colnames(jll_r)[colnames(jll_r) %in% c("Contact")] <- c("Contact_Name")
jll_r <- jll_r[ , -which(names(jll_r) %in% c("Product(s)2","Service Line(s)3",
"Team 2","Team 3","Team 4","Team",
"Total Space Available"))]
jlr <- jll_r %>% select(`Contact Company`,Contact_Name,`Contact Title`,Office,
`Primary Practice`,`Product(s)`,`Service Line(s)`,`Team Combined`,
Email,`Work Phone`)
# Combine sheets
# Make list of df
personnel_list <- list(cr,hr,jlr)
# List columns
merged_col <- c("Contact Company","Contact_Name","Contact Title","Office",
"Primary Practice","Service Line(s)","Team Combined",
"Email","Work Phone","Product(s)")
# Merge to excel sheet then export
mysheet <- personnel_list %>% reduce(full_join, by = merged_col)
export(mysheet,"Personnel_Data.xlsx")
>Solution :
one simple solution could be to read all excel files at once, then loop over the list of data frames.
library(readxl)
files <- list.files(pattern='*.xlsx')
df.list <- lapply(file.list, read_excel)
for(i in 1:length(df.list)) {
## do the renaming here
dat <- df.list[[i]]
colnames(dat)[colnames(dat) %in% c("Product(s)...7",
"Service Line(s)...8")] <- c("Product(s)",
"Service Line(s)")
}