Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

R: How to subset the string in a dataframe column?

Samples are either wild-type or mutant. I want to find the number of samples that are wild-type wt versus mutant mut with respect to the respondents Responder in the Group1 column of the res.sig dataframe. In the n_mutated_group1 column, the first letter indicates the number of mutant and the last two digits (after or) indicates the total number for that group. wt is the difference between total and mut.

# Number of samples in wild-type versus mutant with respect to the most significantly enriched genes 
total <- sum(as.numeric(res.sig %>%
                       filter(Group1=="Responder") %>%
                       mutate(last_letter = substr(n_mutated_group1,-2,-1)) %>%
                       pull(last_letter)))

mut <- sum(as.numeric(res.sig %>%
                        filter(Group1=="Responder") %>%
                        mutate(first_letter = substr(n_mutated_group1,1,1)) %>%
                        pull(first_letter)))

wt <- total-mut

res.sig

> dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9", 
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B", 
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder", 
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder", 
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest", 
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", 
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9 of 25", 
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", 
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "1 of 25", "7 of 25"
), n_mutated_group2 = c("9 of 25", "0 of 25", "6 of 25", "0 of 25", 
"6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", 
"6 of 25", "0 of 25", "7 of 25", "1 of 25"), p_value = c(0.00163083541184905, 
0.00163083541184905, 0.022289766970618, 0.022289766970618, 0.022289766970618, 
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618, 
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.0487971536957187, 
0.0487971536957187), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, 
Inf, 0, Inf, 0.111488645279478, 8.96952328636894), OR_low = c(0, 
2.56647319276964, 0, 1.33358819424024, 0, 1.33358819424024, 0, 
1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356, 
1.0079479819766), OR_high = c(0.38963976043749, Inf, 0.749856668137133, 
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133, 
Inf, 0.749856668137133, Inf, 0.992114690322592, 436.703138665198
), fdr = c(0.109265972593886, 0.109265972593886, 0.248902397838568, 
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568, 
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568, 
0.248902397838568, 0.467058471087594, 0.467058471087594)), row.names = c(NA, 
-14L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000002adab171ef0>)

Problem:

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

Here, I’m getting total as NA_real_ instead of the total number.

>Solution :

You can extract for total everything after the of string in your column and for mut you can extract everything before of. You can use this code:

library(tidyverse)
  total <- sum(as.numeric(res.sig %>%
                          filter(Group1=="Responder") %>%
                          mutate(first_letter = sub('.+of(.+)', '\\1', n_mutated_group1)) %>%
                          pull(first_letter)))
  
  mut <- sum(as.numeric(res.sig %>%
                            filter(Group1 == "Responder") %>%
                            mutate(last_letter = sub("\\of.*", "", n_mutated_group1)) %>%
                            pull(last_letter)))
  
  wt <- total-mut
  wt

Output wt:

[1] 129
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading