Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

R: Split a column into multiple columns in r

I have a data frame with one column that contains a list of factors in each row.

Each row follows the same formatting:

ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Transurethral cystoscopy (procedure)', cui='C0010707', semtypes='[diap]', trigger='[\"Cystourethroscopy\"-tx-1-\"cystourethroscopy\"-noun-0]', location='TX', pos_info='8/17', tree_codes=''). The factor value follows the factor name and an equal sign (e.g., mm = ‘MMI’).

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

I would like to split this column and create columns for these factors. i.e., the results will have index, mm, score, preferred_name, cui…. columns with each row containing the value in single quotation marks. Is there an efficient way to split these lists into columns? I am attaching the sample dataset below.

Sample dataset

  d <- 
    structure(list(0:36, o3.word = c("ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Transurethral cystoscopy (procedure)', cui='C0010707', semtypes='[diap]', trigger='[\"Cystourethroscopy\"-tx-1-\"cystourethroscopy\"-noun-0]', location='TX', pos_info='8/17', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='0%', cui='C3842591', semtypes='[qnco]', trigger='[\"0%\"-tx-1-\"0\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='16.30', preferred_name='Insertion Mutation', cui='C1512796', semtypes='[genf]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='E05.393.420.601.550;G05.365.590.575;G05.558.550')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Clinical act of insertion', cui='C0441587', semtypes='[hlca]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Implantation procedure', cui='C0021107', semtypes='[topp]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Insert (object)', cui='C1883719', semtypes='[ftcn]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+', cui='C3816745', semtypes='[fndg]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+ Score', cui='C2827734', semtypes='[qnco]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+ Score, WHO', cui='C2981698', semtypes='[clas]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='Greater than one', cui='C4280965', semtypes='[qnco]', trigger='[\">1\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Indwelling (qualifier value)', cui='C0439848', semtypes='[ftcn]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-adj-0]', location='TX', pos_info='15/10', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Indwelling Device', cui='C3694424', semtypes='[medd]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-adj-0]', location='TX', pos_info='15/10', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='+2', cui='C0740116', semtypes='[qnco]', trigger='[\"+2\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+', cui='C3833492', semtypes='[fndg]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+ Score', cui='C2827735', semtypes='[qnco]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+ Score, WHO', cui='C2981700', semtypes='[clas]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='PSA Level Less than Two', cui='C4526661', semtypes='[lbtr]', trigger='[\"<2\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='9.99', preferred_name='Ureter', cui='C0041951', semtypes='[bpoc]', trigger='[\"Ureteral\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='17/8', tree_codes='A05.810.776')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Ureteral Route of Drug Administration', cui='C1522613', semtypes='[ftcn]', trigger='[\"URETERAL\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='17/8', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Answer to Question', cui='C3838680', semtypes='[fndg]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Score', cui='C2827736', semtypes='[qnco]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Score, WHO', cui='C2981702', semtypes='[clas]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='<3 (qualifier value)', cui='C0439086', semtypes='[qnco]', trigger='[\"<3\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='9.99', preferred_name='Stent, device', cui='C0038257', semtypes='[medd]', trigger='[\"Stent\"-tx-1-\"stent\"-noun-0]', location='TX', pos_info='20/5', tree_codes='E07.695.750')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='4+ Answer to question', cui='C3838679', semtypes='[fndg]', trigger='[\"4+\"-tx-1-\"4\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='17.80', preferred_name='Name', cui='C0027365', semtypes='[inpr]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='L01.559.598.400.556')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='12.96', preferred_name='Ozone', cui='C0030106', semtypes='[chem]', trigger='[\"O3\"-tx-1-\"o3\"-noun-0]', location='TX', pos_info='6/2', tree_codes='D01.362.670.600;x.x.x.x')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='CDISC ADaM Derivation Type Terminology', cui='C2825527', semtypes='[inpr]', trigger='[\"DTYPE\"-tx-1-\"dtype\"-noun-0]', location='TX', pos_info='29/5', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Computer Programming Object', cui='C1704861', semtypes='[cnce]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Length', cui='C1444754', semtypes='[qnco]', trigger='[\"Length\"-tx-1-\"Length\"-noun-0]', location='TX', pos_info='15/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Length of Trial', cui='C1706316', semtypes='[resa]', trigger='[\"LENGTH\"-tx-1-\"Length\"-noun-0]', location='TX', pos_info='15/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Name (property) (qualifier value)', cui='C4522128', semtypes='[qlco]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Object Class', cui='C1518526', semtypes='[cnce]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Person Name', cui='C1547383', semtypes='[inpr]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Physical object', cui='C0347997', semtypes='[phob]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Term (lexical)', cui='C1705313', semtypes='[idcn]', trigger='[\"Word\"-tx-1-\"word\"-noun-0]', location='TX', pos_info='9/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='United States Military Commissioned Officer O3', cui='C4521399', semtypes='[clas]', trigger='[\"O3\"-tx-1-\"o3\"-noun-0]', location='TX', pos_info='6/2', tree_codes='')"
    )), class = "data.frame", row.names = c(NA, -37L))
  

>Solution :

You could convert each entry into a list (with parse and eval) and then bind them together:

library(tidyverse)

str_replace(d$o3.word, "ConceptMMI", "list") |> 
  map(\(x) as_tibble(eval(parse(text = x)))) |> 
  list_rbind()

Output

# A tibble: 37 × 10
   index       mm    score preferred_name                       cui    semty…¹ trigger locat…² pos_i…³ tree_…⁴
   <chr>       <chr> <chr> <chr>                                <chr>  <chr>   <chr>   <chr>   <chr>   <chr>  
 1 tmppplgwffe MMI   3.68  Transurethral cystoscopy (procedure) C0010… [diap]  "[\"Cy… TX      8/17    ""     
 2 tmppplgwffe MMI   3.50  0%                                   C3842… [qnco]  "[\"0%… TX      0/1     ""     
 3 tmppplgwffe MMI   16.30 Insertion Mutation                   C1512… [genf]  "[\"In… TX      16/9    "E05.3…
 4 tmppplgwffe MMI   3.68  Clinical act of insertion            C0441… [hlca]  "[\"In… TX      16/9    ""     
 5 tmppplgwffe MMI   3.68  Implantation procedure               C0021… [topp]  "[\"In… TX      16/9    ""     
 6 tmppplgwffe MMI   3.68  Insert (object)                      C1883… [ftcn]  "[\"In… TX      16/9    ""     
 7 tmppplgwffe MMI   3.50  1+                                   C3816… [fndg]  "[\"1+… TX      0/1     ""     
 8 tmppplgwffe MMI   3.50  1+ Score                             C2827… [qnco]  "[\"1+… TX      0/1     ""     
 9 tmppplgwffe MMI   3.50  1+ Score, WHO                        C2981… [clas]  "[\"1+… TX      0/1     ""     
10 tmppplgwffe MMI   3.50  Greater than one                     C4280… [qnco]  "[\">1… TX      0/1     ""     
# … with 27 more rows, and abbreviated variable names ¹​semtypes, ²​location, ³​pos_info, ⁴​tree_codes
# ℹ Use `print(n = ...)` to see more rows
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading