Using mutate and case_when to replace strings

Advertisements

I have a list of names that has multiple variations for lots of people. I thought I could just use mutate and case_when to do this but it throws an error

dput(Unique_names)
structure(list(Fieldworker = c("Nico Alioravainen", "Sam Crofts", 
"Stephen Lang", "Will Hayward", "Keith McMahon", "Josh Firth", 
"Emily Simmonds", "Bernhard Voelkl", "Michele Leveque-shaw", 
"Andy Gosler", "Philip Chapman", "Antica Culina", "Zoe Deakin", 
"Sean Kelly", "Lucy Aplin", "Nicole Milligan", "Reinder Radersma", 
"Colin Garroway", "Ross Crates", "KEITH MCMAHON", "MICHELE LEVEQUE-SHAW", 
"JOSH FIRTH", "NICOLE MILLIGAN", "STEPHEN LANG", "SAM CROFTS", 
"Kai Kam", "Erik Sandvig", "RACHEL KAM", "Stephen lang", "DAMIEN FARINE", 
"Pepe Greno", "Jack Nurse", "Ella Cole", "Adele Powell", "Pepe Greño", 
"Julian Howe", "David Diez", "Lindall Kidd", "Damien Farine", 
"Simon Evans", "Shelly Lachish", "Ben Sheldon", "Bjorn Beckmann", 
"Colin GaReinder Radersma", "Richard Broughton", "LUCY APLIN", 
"Rachel Kam", "ERIK SANDVIG", "EMILY SIMMONDS", "Nico Alioravainenn", 
"Ada Grabowska-Zhang", "Keith Mc Mahon", "Koosje Lamers", "Ada Grabowska", 
"Keith McMhon", "Michele Leveque-Shaw", "Ollie Padget", "Sarah Bond", 
"Fraser Bell", "Neeltje Boogert", "David Kelly", "Dave Kelly", 
"NEELTJE BOOGERT", "Marta Maziarz", "Ashley Cook", "Ben Balmford", 
"Ashley cook", "Lucy Larkman", "Friederike Hillermann", "Sara Keen", 
"Friederike Hillemann", "Alison Roth", "Rachelle Regan", "Bryony Baker", 
"Erin Taylor", "koosje Lamers", "KOOSJE LAMERS", "BEN BALMFORD", 
"FRASER BELL", "Damien Fairne", "Orsolya Vincze", "Fraser bell", 
"Freddy Hillemann", "Allison Roth", "Bob Curry", "FREDDY HILLEMAN", 
"Emily Simmond", "Benjamin Van Doren", "Ashley Sindell Price", 
"Ash Sindell Price", "Caroline Brighton", "Michele leveque Shaw", 
"Freddy hillemann", "James Kennerley", "James Kennerly", "Ashley sindell Price", 
"George Candelin", "Nishant Kumar", "Ashley Sendell Price", "Lucinda Zadwadski", 
"Michele Shaw", "Isaac West", "Lucinda Zawadski", "George Candlin", 
"Edwina West", "Michael Reichert", "Martyna Syposz", "Ash Sendell-Price", 
"Martyna syposz", "Michael Reichart", "Sam crofts", "Saverio Lubrano", 
"Cedric Jouanneau", "Cedric Joanneau", "Chloe Bradford", "Miriam Lord", 
"Mairi Franklin", "freddy Hillemann", "Emma Inzani", "Hannah Lemon", 
"Phil Chapman", "Louise Hill", "Chris Batey", "Isabel Key", "Grant McDonald", 
"Zhengxin Yang", "Ashley Sendell-Price", "Michal Jeziersk", "Zhengin Yang", 
"Martyn", "Michal Jezierski", "Weena West", "Ailidh Barnes", 
"Cristiano Gala", "Kanerva Korhonen", "kanerva Korhonen", "KAnerva Korhonen", 
"Olivia Pargeter", "Joe Bliss", "Ben Walton", "Joseph Bliss", 
"Kanera Korhonen", "Chris Perrins", "Julia Haynes", "Ada Grabowska Zhang", 
"Julien Collet", "Samin  Gokcekus", "Will Smith", "Samin Gokcekus", 
"Nilo Merino Recalde", "Celia Lougmani", "Tom Broom", "Peter Santema", 
"Joe Cooper", "Talya Hackett", "Andrea Estandia", "Will Langdon", 
"Richard Cope", "Charlotte Regan", "KeithMcMahon", "SamCrofts", 
"Kristina Beck", "JuliaHaynes", "KristinaBeck", "Carys Jones", 
"Peter santema", "CarysJones", "Petersantema", "WillSmith", "Alice Edney", 
"Anett Kiss", "Joe Woodman", "PeterSantema", "AnettKiss", "JoeWoodman", 
"CharlotteRegan", " Joe Woodman", " Alice Edney", " Peter Santema", 
" Julia Haynes", " Anett Kiss", " Carys Jones", " Sam Crofts", 
" Kristina Beck", " Charlotte Regan", " Keith McMahon", "Nilo Recalde", 
"Gabrielle Davidson", " Nilo Merino Recalde", " Andrea Estandia", 
"GabrielleDavidson", "NiloMerinoRecalde", " Gabrielle Davidson", 
" Nilo Merino-Recalde", "Nilo Merino-Recalde", " Denise Wawman", 
" Jackson-Houlston", " George Candelin", " Andy Gosler", "Jackson-Houlston", 
" Ada Grabowska", "DeniseWawman", "DaniBanks", "MichalJezierski", 
" Michal Jezierski", " Will Smith", "AndyGosler", "AndreaEstandia", 
" Ana Shapiro", "Jimmy Hill", "AnaShapiro", "Joshua Evans", "Josh Evans", 
"Denise Wawman", "Sarah Nicholls", "Tim Jaeger", "Kyu-Min Huh", 
"Daisy Abraham", "Devi Satarkar")), row.names = c(NA, -219L), class = "data.frame")

Unique_names %>%
  mutate(Fieldworker =  case_when(Fieldworker == "SAM CROFTS" | "Sam crofts" | "SamCrofts" | " Sam Crofts" ~ "Sam Crofts",
                             Fieldworker == "KEITH MCMAHON" | "Keith Mc Mahon" | "Keith McMhon" | "KeithMcMahon" | " Keith McMahon" ~ "Keith McMahon",
                             Fieldworker == "Ada Grabowska" | "Ada Grabowska Zhang" | " Ada Grabowska" ~ "Ada Grabowska-Zhang",
                             Fieldworker == "Nico Alioravainenn" ~ "Nico Alioravainen",
                             Fieldworker == "STEPHEN LANG" | "Stephen lang" ~ "Stephen Lang",
                             Fieldworker == "NICOLE MILLIGAN" ~ "Nicole Milligan",
                             Fieldworker == " Will Smith" ~ "Will Smith",
                             Fieldworker ==  "JOSH FIRTH" ~ "Josh Firth",
                             Fieldworker == "Joshua Evans" ~ "Josh Evans",
                             Fieldworker == "EMILY SIMMONDS" | "Emily Simmond" ~ "Emily Simmonds",
                             Fieldworker == "MICHELE LEVEQUE-SHAW" | "Michele leveque Shaw" | "Michele Shaw" | "Michele leveque-shaw" ~ "Michele Leveque-Shaw",
                             Fieldworker == " Andy Gosler" | "AndyGosler" ~ "Andy Gosler",
                             Fieldworker == "LUCY APLIN" ~ "Lucy Aplin",
                             Fieldworker == "ERIK SANDVIG" ~ "Erik Sandvig",
                             Fieldworker == "FREDDY HILLEMAN" | "Freddy hillemann" | "freddy Hillemann" | "Friederike Hillermann" | "Friederike Hillemann" ~ "Freddy Hillemann",
                             Fieldworker == "DAMIEN FARINE" | "Damien Fairne" ~ "Damien Farine",
                             Fieldworker == "Ashley Cook" | "Ashley cook" | "Ashley Sindell Price" | "Ash Sindell Price" | "Ashley sindell Price" |
                               "Ashley Sendell Price" | "Ash Sendell-Price" ~ "Ashley Sendell-Price",
                             Fieldworker == "koosje Lamers" | "KOOSJE LAMERS" ~ "Koosje Lamers",
                             Fieldworker == "Martyna syposz" ~ "Martina Syposz",
                             Fieldworker == "Michal Jeziersk" ~ "Michal Jezierski",
                             Fieldworker == "kanerva Korhonen" | "KAnerva Korhonen" ~ "Kanerva Korhonen",
                             Fieldworker == "JuliaHaynes" | " Julia Haynes" ~ "Julia Haynes",
                             Fieldworker == "Nilo Merino Recalde" | "Nilo Recalde" | " Nilo Merino Recalde" | "NiloMerinoRecalde" |
                               " Nilo Merino-Recalde" ~ "Nilo Merino-Recalde",
                             Fieldworker == " Andrea Estandia" | "AndreaEstandia" ~ "Andrea Estandia",
                             Fieldworker == "Peter santema" | "Petersantema" |"PeterSantema" | " Peter Santema" ~ "Peter Santema",
                             Fieldworker == "CharlotteRegan" | " Charlotte Regan" ~ "Charlotte Regan",
                             Fieldworker == "KristinaBeck" | " Kristina Beck" ~ "Kristina Beck",
                             Fieldworker == "CarysJones" | " Carys Jones" ~ "Carys Jones",
                             Fieldworker == "AnettKiss" | " Anett Kiss" ~ "Anett Kiss",
                             Fieldworker == "JoeWoodman" | " Joe Woodman" ~ "Joe Woodman",
                             Fieldworker == "DeniseWawman" | " Denise Wawman" ~ "Denise Wawman"))

I get Error in `mutate()`: ! Problem while computing `Fieldworker = case_when(...)`. Caused by error in `Fieldworker == "SAM CROFTS" | "Sam crofts"`: ! operations are possible only for numeric, logical or complex types Run `rlang::last_error()` to see where the error occurred.

I have tried using & instead of | but I don’t think that is correct, I get the same error anyway.

What am I missing here, or is there a better way to do this? Hopefully one that I don’t have to type out all this code again!

>Solution :

To use or (|) statements, you need to pass a logical value to both sides. This is wrong:

Fieldworker == "SAM CROFTS" | "Sam crofts"

it should be:

Fieldworker == "SAM CROFTS" | Fieldworker == "Sam crofts"

But, you can make a more concise operation with %in%:

Fieldworker %in% c("SAM CROFTS", "Sam crofts")

You can easly modify your code using ctrl+F in rstudio. Substitute == with %in% c(, | with ,, and ~ with ) ~. This yields:

Unique_names %>%
  mutate(Fieldworker =  case_when(Fieldworker %in% c( "SAM CROFTS", "Sam crofts", "SamCrofts", " Sam Crofts") ~ "Sam Crofts",
                                  Fieldworker %in% c( "KEITH MCMAHON", "Keith Mc Mahon", "Keith McMhon", "KeithMcMahon", " Keith McMahon") ~ "Keith McMahon",
                                  Fieldworker %in% c( "Ada Grabowska", "Ada Grabowska Zhang", " Ada Grabowska") ~ "Ada Grabowska-Zhang",
                                  Fieldworker %in% c( "Nico Alioravainenn") ~ "Nico Alioravainen",
                                  Fieldworker %in% c( "STEPHEN LANG", "Stephen lang") ~ "Stephen Lang",
                                  Fieldworker %in% c( "NICOLE MILLIGAN") ~ "Nicole Milligan",
                                  Fieldworker %in% c( " Will Smith") ~ "Will Smith",
                                  Fieldworker %in% c(  "JOSH FIRTH") ~ "Josh Firth",
                                  Fieldworker %in% c( "Joshua Evans") ~ "Josh Evans",
                                  Fieldworker %in% c( "EMILY SIMMONDS", "Emily Simmond") ~ "Emily Simmonds",
                                  Fieldworker %in% c( "MICHELE LEVEQUE-SHAW", "Michele leveque Shaw", "Michele Shaw", "Michele leveque-shaw") ~ "Michele Leveque-Shaw",
                                  Fieldworker %in% c( " Andy Gosler", "AndyGosler") ~ "Andy Gosler",
                                  Fieldworker %in% c( "LUCY APLIN") ~ "Lucy Aplin",
                                  Fieldworker %in% c( "ERIK SANDVIG") ~ "Erik Sandvig",
                                  Fieldworker %in% c( "FREDDY HILLEMAN", "Freddy hillemann", "freddy Hillemann", "Friederike Hillermann", "Friederike Hillemann") ~ "Freddy Hillemann",
                                  Fieldworker %in% c( "DAMIEN FARINE", "Damien Fairne") ~ "Damien Farine",
                                  Fieldworker %in% c( "Ashley Cook", "Ashley cook", "Ashley Sindell Price", "Ash Sindell Price", "Ashley sindell Price",
                                    "Ashley Sendell Price", "Ash Sendell-Price") ~ "Ashley Sendell-Price",
                                  Fieldworker %in% c( "koosje Lamers", "KOOSJE LAMERS") ~ "Koosje Lamers",
                                  Fieldworker %in% c( "Martyna syposz") ~ "Martina Syposz",
                                  Fieldworker %in% c( "Michal Jeziersk") ~ "Michal Jezierski",
                                  Fieldworker %in% c( "kanerva Korhonen", "KAnerva Korhonen") ~ "Kanerva Korhonen",
                                  Fieldworker %in% c( "JuliaHaynes", " Julia Haynes") ~ "Julia Haynes",
                                  Fieldworker %in% c( "Nilo Merino Recalde", "Nilo Recalde", " Nilo Merino Recalde", "NiloMerinoRecalde",
                                    " Nilo Merino-Recalde") ~ "Nilo Merino-Recalde",
                                  Fieldworker %in% c( " Andrea Estandia", "AndreaEstandia") ~ "Andrea Estandia",
                                  Fieldworker %in% c( "Peter santema", "Petersantema","PeterSantema", " Peter Santema") ~ "Peter Santema",
                                  Fieldworker %in% c( "CharlotteRegan", " Charlotte Regan") ~ "Charlotte Regan",
                                  Fieldworker %in% c( "KristinaBeck", " Kristina Beck") ~ "Kristina Beck",
                                  Fieldworker %in% c( "CarysJones", " Carys Jones") ~ "Carys Jones",
                                  Fieldworker %in% c( "AnettKiss", " Anett Kiss") ~ "Anett Kiss",
                                  Fieldworker %in% c( "JoeWoodman", " Joe Woodman") ~ "Joe Woodman",
                                  Fieldworker %in% c( "DeniseWawman", " Denise Wawman") ~ "Denise Wawman"))

Leave a ReplyCancel reply