Playing around with some CDC data looking to formulate a research question. I’ve been doing some basic statistics/analyses along with making some basic plots to better visualize the data.
One of the plots I created is here:

Is it possible to make this more clear (in the sense where we can better visualize the genuses by trap size and male mosquitoes counted)?
Here was my code:
ggplot(Hawaii.cdc, aes(MalesCollected, TrapType)) +
geom_point(aes(color=TrapSite, shape=Genus)) +theme_bw()
Here are my (randomly sampled) data:
> sampled_df<- sample_n(Hawaii.cdc, 50)
> dput(sampled_df)
structure(list(TrapType = c("BGS Trap", "BGS Trap", "BGS Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "Larval/Pupal Collection",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "UV Light Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "UV Light Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap", "BGS Trap",
"BGS Trap", "BGS Trap", "BGS Trap", "Larval/Pupal Collection",
"BGS Trap", "BGS Trap"), AttractantsUsed = c("Lure", "Lure",
"None", "Lure", "Lure", "Lure", "Lure", "Lure", "Lure", "Lure",
"CO2 and Lure", "Lure", "Lure", "Lure and UV Led Light", "None",
"CO2 and Lure", "Lure", "Lure", "Lure", "Lure", "CO2", "Lure",
"None", "Lure", "CO2 and Lure", "Hay or grass infusion", "Lure",
"Lure", "Lure", "Lure", "Lure", "Lure", "Lure", "Lure", "Lure",
"Lure", "Lure", "BG", "Lure", "Lure", "Lure", "Lure", "Lure",
"None", "Lure", "Lure", "Lure", "None", "CO2 and Lure and Led Uv Light",
"Lure"), TrapID = c("Parking #1", "Air strip #4", "Parking #3",
"#2", "#4", "Parking #2", "Air Strip #1", "Air strip #3", "Air strip #2",
"Parking #4", "Air Strip #1", "Air Strip #1", "Parking #4",
"Air strip #3", "0-6", "Baggage #1", "#4", "Parking #3", "Sewage #2",
"Parking #1", "Sewage #3", "Baggage #4", "Air Strip #4", "Air strip #2",
"Air Strip #4", "Air Strip #1", "Parking #3", "Air strip #2",
"Air strip #4", "Air strip #1", "Parking #2", "#3", "Parking #2",
"Parking #3", "Parking #1", "Parking #3", "Parking #1", "Air strip #1",
"Air Strip #1", "Air Strip #1", "Parking #2", "Air strip #2",
"#2", "Parking #3", "Air Strip #1", "Parking #2", "Air strip #4",
"HOVE Container", "Parking #1", "Air Strip #3"), Latitude = c(19.71314,
19.712502, 19.71311, 20.03724, 20.03977, 19.71312, 19.7121, 19.7124,
19.71224, 19.71311, 19.711245, 19.711245, 19.71311, 19.712048,
20.21925, 19.71502, 20.03977, 19.71311, 19.71238, 19.71314, 19.71239,
19.715, 19.7125, 19.7125, 19.7125, 19.711245, 19.71311, 19.7125,
19.7125, 19.71212, 19.71312, 20.03938, 19.71312, 19.71311, 19.71314,
19.71311, 19.71314, 19.71212, 19.7121, 19.71212, 19.71312, 19.7125,
20.03724, 19.71311, 19.71212, 19.71312, 19.7125, 19.088978, 19.71314,
19.71224), Longitude = c(-155.0396, -155.057462, -155.03906,
-155.82648, -155.83147, -155.03934, -155.05975, -155.05923, -155.05942,
-155.03876, -155.059803, -155.059803, -155.03876, -155.057852,
-155.75585, -155.04094, -155.83147, -155.03906, -155.03697, -155.0396,
-155.03725, -155.04027, -155.05894, -155.05942, -155.05894, -155.059803,
-155.03906, -155.05942, -155.05894, -155.05975, -155.03934, -155.82956,
-155.03934, -155.03906, -155.0396, -155.03906, -155.0396, -155.05975,
-155.05975, -155.05975, -155.03934, -155.05942, -155.82648, -155.03906,
-155.05975, -155.03934, -155.05894, -155.762254, -155.0396, -155.05923
), Address = c("Kekuanaoa Street", "Airport Road", "Kekuanaoa Street",
"Kawaihae Rd.", "Kawaihae Rd.", "Kekuanaoa Street", "Airport Road",
"Airport Road", "Airport Rd.", "Kekuanaoa Street", "Airport Road",
"Airport Road", "Kekuanaoa St.", "Airport Rd.", "Akoni Pule Hwy.Kohala",
"Kukuanaoa Street", "Kawaihae Road", "Kekuanaoa Street", "Kekuanaoa Street",
"Kekuanaoa St.", "Kekuanaoa St.", "Kekuanaoa St.", "Airport Road",
"Airport Road", "Airport Road", "Airport Road", "Kekuanaoa Street",
"Airport Road", "Airport Road", "Airport Road", "Kekuanaoa Street",
"Kawaihae Rd.", "Kekuanaoa St.", "Kekuanaoa St.", "Kekuanaoa St.",
"Kekuanaoa St.", "Kekuanaoa St.", "Airport Rd.", "Airport Road",
"Airport Rd.", "Kekuanaoa Street", "Airport Road", "Kawaihae Rd.",
"Kekuanaoa St.", "Airport Road", "Kekuanaoa Street", "Airport Road",
"Keaka Blvd.", "Kekuanaoa Street", "Airport Road"), Town = c("Hilo",
"Hilo", "Hilo", "Kawaihae", "Kawaihae", "Hilo", "Hilo", "Hilo",
"Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Kohala", "Hilo",
"Kawaihae", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo",
"Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Kawaihae",
"Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo", "Hilo",
"Hilo", "Hilo", "Kawaihae", "Hilo", "Hilo", "Hilo", "Hilo", "Ocean View",
"Hilo", "Hilo"), State = c("HI", "HI", "HI", "HI", "HI", "HI",
"HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI",
"HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI",
"HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI",
"HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI", "HI"
), County = c("Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County", "Hawaii County",
"Hawaii County", "Hawaii County", "Hawaii County"), TrapSite = c("Airport",
"Airport", "Airport", "Business", "Business", "Airport", "Airport",
"Airport", "Business", "Airport", "Airport", "Airport", "Business",
"Airport", "Residential", "Airport", "Business", "Airport", "Airport",
"Airport", "Business", "Airport", "Airport", "Airport", "Airport",
"Airport", "Airport", "Airport", "Airport", "Airport", "Airport",
"Business", "Business", "Business", "Business", "Business", "Airport",
"Airport", "Airport", "Business", "Airport", "Airport", "Business",
"Business", "Airport", "Airport", "Airport", "Residential", "Airport",
"Airport"), TrapSet = c("05/23/2017", "01/06/2020", "09/25/2017",
"08/29/2017", "07/04/2017", "02/05/2018", "01/28/2019", "04/02/2018",
"12/10/2018", "03/09/2020", "07/29/2019", "03/25/2019", "12/16/2018",
"01/21/2020", "07/12/2017", "06/21/2017", "11/20/2017", "06/06/2017",
"06/19/2017", "12/19/2017", "06/21/2017", "08/07/2017", "06/19/2017",
"08/27/2018", "06/22/2017", "07/08/2019", "06/25/2018", "04/02/2018",
"09/03/2018", "03/12/2018", "10/29/2018", "07/12/2017", "01/21/2019",
"01/14/2019", "01/07/2019", "12/10/2018", "09/06/2017", "12/06/2017",
"12/03/2018", "07/10/2017", "07/09/2018", "07/02/2018", "06/13/2017",
"06/21/2017", "07/26/2017", "10/16/2017", "10/01/2018", "10/17/2017",
"09/16/2019", "11/26/2018"), SetTimeOfDay = c("Morning", "Morning",
"Morning", "Afternoon", "Afternoon", "Morning", "Morning", "Morning",
"Morning", "Morning", "Morning", "Morning", "Morning", "Morning",
"Afternoon", "Morning", "Morning", "Morning", "Morning", "Afternoon",
"Afternoon", "Afternoon", "Morning", "Morning", "Morning", "Morning",
"Morning", "Morning", "Morning", "Morning", "Morning", "Afternoon",
"Morning", "Morning", "Morning", "Morning", "Afternoon", "Afternoon",
"Morning", "Afternoon", "Morning", "Morning", "Afternoon", "Afternoon",
"Morning", "Morning", "Morning", "Afternoon", "Morning", "Morning"
), TrapCollect = c("05/25/2017", "01/10/2020", "09/26/2017",
"08/31/2017", "07/05/2017", "02/08/2018", "02/04/2019", "04/06/2018",
"12/14/2018", "03/13/2020", "08/02/2019", "03/29/2019", "12/20/2018",
"01/24/2020", "07/12/2017", "06/22/2017", "11/22/2017", "06/08/2017",
"06/20/2017", "12/20/2017", "06/22/2017", "08/09/2017", "06/20/2017",
"08/31/2018", "06/23/2017", "07/12/2019", "06/29/2018", "04/06/2018",
"09/07/2018", "03/16/2018", "11/02/2018", "07/14/2017", "01/28/2019",
"01/18/2019", "01/14/2019", "12/14/2018", "09/08/2017", "12/07/2017",
"12/07/2018", "07/11/2017", "07/13/2018", "07/06/2018", "06/15/2017",
"06/22/2017", "07/28/2017", "10/20/2017", "10/05/2018", "10/17/2017",
"09/20/2019", "11/30/2018"), CollectTimeOfDay = c("Morning",
"Morning", "Morning", "Afternoon", "Afternoon", "Morning", "Morning",
"Morning", "Morning", "Morning", "Morning", "Morning", "Morning",
"Morning", "Afternoon", "Morning", "Morning", "Morning", "Morning",
"Afternoon", "Afternoon", "Afternoon", "Morning", "Morning",
"Morning", "Morning", "Morning", "Morning", "Morning", "Morning",
"Morning", "Afternoon", "Morning", "Morning", "Morning", "Morning",
"Afternoon", "Afternoon", "Morning", "Afternoon", "Morning",
"Morning", "Afternoon", "Afternoon", "Morning", "Morning", "Morning",
"Afternoon", "Morning", "Morning"), Genus = c("Aedes", "Aedes",
"Aedes", "Aedes", "Aedes", "Culex", "Culex", "Culex", "Aedes",
"Aedes", "Aedes", "Aedes", "Aedes", "Aedes", "Aedes", "Aedes",
"Aedes", "Aedes", "Aedes", "Aedes", "Culex", "Culex", "Aedes",
"Aedes", "Culex", "Culex", "Aedes", "Culex", "Culex", "Culex",
"Aedes", "Aedes", "Aedes", "Culex", "Aedes", "Aedes", "Culex",
"Culex", "Aedes", "Aedes", "Aedes", "Aedes", "Aedes", "Aedes",
"Aedes", "Aedes", "Aedes", "Aedes", "Aedes", "Aedes"), Species = c("albopictus",
"albopictus", "albopictus", "aegypti", "albopictus", "quinquefasciatus",
"quinquefasciatus", "quinquefasciatus", "albopictus", "albopictus",
"albopictus", "albopictus", "albopictus", "albopictus", "albopictus",
"albopictus", "aegypti", "albopictus", "albopictus", "vexans",
"quinquefasciatus", "quinquefasciatus", "albopictus", "albopictus",
"quinquefasciatus", "quinquefasciatus", "albopictus", "quinquefasciatus",
"quinquefasciatus", "quinquefasciatus", "albopictus", "aegypti",
"albopictus", "quinquefasciatus", "albopictus", "albopictus",
"quinquefasciatus", "quinquefasciatus", "albopictus", "albopictus",
"albopictus", "albopictus", "albopictus", "albopictus", "albopictus",
"albopictus", "albopictus", "aegypti", "albopictus", "albopictus"
), LifeStage = c("Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Larvae/pupae", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Larvae/pupae", "Adult", "Adult"), EggsCollected = c("No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No"), LarvaeCollected = c("No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "Yes", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "Yes", "No", "No"), PupaeCollected = c("No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "Yes", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "Yes", "No", "No"), FemalesCollected = c(32, 1, 10,
5, 1, 7, 2, 4, 3, 7, 33, 19, 0, 8, 15, 1, 2, 8, 1, 3, 12, 1,
1, 1, 1, 2, 3, 1, 10, 2, 15, 1, 18, 2, 29, 2, 3, 1, 9, 3, 6,
5, 4, 2, 0, 4, 6, 1, 22, 2), MalesCollected = c(54, 0, 9, 3,
1, 0, 0, 0, 0, 0, 4, 5, 2, 0, 12, 0, 15, 4, 0, 2, 0, 0, 5, 1,
0, 0, 0, 0, 0, 0, 6, 0, 3, 0, 6, 0, 0, 1, 4, 3, 1, 2, 3, 1, 1,
1, 3, 0, 5, 0), UnknownCollected = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), ReportDate = c("08/01/2017 8:31 PM", "05/07/2020 4:13 PM",
"04/19/2018 8:44 PM", "04/19/2018 8:44 PM", "08/01/2017 8:31 PM",
"01/31/2019 8:05 PM", "07/30/2019 9:38 PM", "10/05/2018 9:15 PM",
"07/30/2019 9:38 PM", "05/07/2020 4:13 PM", "05/07/2020 4:13 PM",
"05/07/2020 4:13 PM", "07/30/2019 9:38 PM", "05/07/2020 4:13 PM",
"04/19/2018 8:44 PM", "08/01/2017 8:31 PM", "04/19/2018 8:44 PM",
"08/01/2017 8:31 PM", "08/01/2017 8:31 PM", "04/19/2018 8:44 PM",
"08/01/2017 8:31 PM", "04/19/2018 8:44 PM", "08/01/2017 8:31 PM",
"10/05/2018 9:15 PM", "08/01/2017 8:31 PM", "05/07/2020 4:13 PM",
"10/05/2018 9:15 PM", "10/05/2018 9:15 PM", "10/05/2018 9:15 PM",
"10/05/2018 9:15 PM", "01/31/2019 8:45 PM", "08/01/2017 8:31 PM",
"07/30/2019 9:38 PM", "07/30/2019 9:38 PM", "07/30/2019 9:38 PM",
"07/30/2019 9:38 PM", "04/19/2018 8:44 PM", "04/19/2018 8:44 PM",
"07/30/2019 9:38 PM", "08/01/2017 8:31 PM", "01/31/2019 8:22 PM",
"10/05/2018 9:15 PM", "08/01/2017 8:31 PM", "08/01/2017 8:31 PM",
"04/19/2018 8:44 PM", "04/19/2018 8:44 PM", "11/20/2018 7:37 PM",
"04/19/2018 8:44 PM", "05/07/2020 4:13 PM", "07/30/2019 9:38 PM"
)), row.names = c(NA, -50L), spec = structure(list(cols = list(
TrapType = structure(list(), class = c("collector_character",
"collector")), AttractantsUsed = structure(list(), class = c("collector_character",
"collector")), TrapID = structure(list(), class = c("collector_character",
"collector")), Latitude = structure(list(), class = c("collector_double",
"collector")), Longitude = structure(list(), class = c("collector_double",
"collector")), Address = structure(list(), class = c("collector_character",
"collector")), Town = structure(list(), class = c("collector_character",
"collector")), State = structure(list(), class = c("collector_character",
"collector")), County = structure(list(), class = c("collector_character",
"collector")), TrapSite = structure(list(), class = c("collector_character",
"collector")), TrapSet = structure(list(), class = c("collector_character",
"collector")), SetTimeOfDay = structure(list(), class = c("collector_character",
"collector")), TrapCollect = structure(list(), class = c("collector_character",
"collector")), CollectTimeOfDay = structure(list(), class = c("collector_character",
"collector")), Genus = structure(list(), class = c("collector_character",
"collector")), Species = structure(list(), class = c("collector_character",
"collector")), LifeStage = structure(list(), class = c("collector_character",
"collector")), EggsCollected = structure(list(), class = c("collector_character",
"collector")), LarvaeCollected = structure(list(), class = c("collector_character",
"collector")), PupaeCollected = structure(list(), class = c("collector_character",
"collector")), FemalesCollected = structure(list(), class = c("collector_double",
"collector")), MalesCollected = structure(list(), class = c("collector_double",
"collector")), UnknownCollected = structure(list(), class = c("collector_double",
"collector")), ReportDate = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x600002e98b20>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
>Solution :
This is my suggestion:
- Change the axis
- Transform y to log
- Use
position_jitter
library(tidyverse)
ggplot(Hawaii.cdc, aes(y=log(MalesCollected), x=TrapType)) +
geom_point(aes(color=TrapSite, shape = Genus),
position = position_jitter(width=0.2, height=0.5), size = 2) +
ylab("Males Collected")+
theme_bw()
