Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

split a dataframe into equal parts and store the results

I’m relatively new to R.
I have a large dataframe which I would like to split into multiple dataframes around different values.

structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Bangladesh", "Barbados", "Benin", "Burundi", 
"Cameroon", "Chile", "Cyprus", "Ecuador", "Equatorial Guinea", 
"Gabon", "Ghana", "Guatemala", "Guinea", "Guyana", "Haiti", "India", 
"Jordan", "Lebanon", "Liberia", "Madagascar", "Mali", "Mexico", 
"Morocco", "Mozambique", "Nepal", "Nicaragua", "Niger", "Oman", 
"Pakistan", "Panama", "Peru", "Rwanda", "Senegal", "Seychelles", 
"Sierra Leone", "Singapore", "Sri Lanka", "Sudan", "Togo", "Tunisia", 
"Turkey", "Uganda", "Zambia"), class = c("pseries", "factor")), 
    date = structure(12:36, .Label = c("1965", "1966", "1967", 
    "1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", 
    "1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", 
    "1984", "1985", "1986", "1987", "1988", "1989", "1990", "1991", 
    "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", 
    "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", 
    "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
    "2016", "2017", "2018"), class = c("pseries", "factor")), 
    oda_gdp = c(0.15080885502447, 0.1744123099839, 0.199176897551553, 
    0.193616875061556, 0.186942991013889, 0.164744452026834, 
    0.192609744294439, 0.13752013069625, 0.156186721262664, 0.137192335225767, 
    0.131167382827501, 0.139945790928319, 0.112553104508006, 
    0.1172188903714, 0.120991133274215, 0.0940867931618562, 0.0857724612850372, 
    0.0653099752359248, 0.0714189688493898, 0.0470115150264598, 
    0.0446068588203229, 0.0414522297087586, 0.0450866627292532, 
    0.0435203084091358, 0.0404623996092304), entry = c(0, 0, 
    1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0)), row.names = c(NA, 25L), class = "data.frame")

I would like to split this into multiple dataframes around the points where entry==1. More concretely, I would like each new dataframe to contain 2 rows before entry==1 and 7 rows after entry==1.

Output should look like this

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

df1=structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("Bangladesh", "Barbados", "Benin", 
"Burundi", "Cameroon", "Chile", "Cyprus", "Ecuador", "Equatorial Guinea", 
"Gabon", "Ghana", "Guatemala", "Guinea", "Guyana", "Haiti", "India", 
"Jordan", "Lebanon", "Liberia", "Madagascar", "Mali", "Mexico", 
"Morocco", "Mozambique", "Nepal", "Nicaragua", "Niger", "Oman", 
"Pakistan", "Panama", "Peru", "Rwanda", "Senegal", "Seychelles", 
"Sierra Leone", "Singapore", "Sri Lanka", "Sudan", "Togo", "Tunisia", 
"Turkey", "Uganda", "Zambia"), class = c("pseries", "factor")), 
    date = structure(12:21, .Label = c("1965", "1966", "1967", 
    "1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", 
    "1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", 
    "1984", "1985", "1986", "1987", "1988", "1989", "1990", "1991", 
    "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", 
    "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", 
    "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
    "2016", "2017", "2018"), class = c("pseries", "factor")), 
    oda_gdp = c(0.15080885502447, 0.1744123099839, 0.199176897551553, 
    0.193616875061556, 0.186942991013889, 0.164744452026834, 
    0.192609744294439, 0.13752013069625, 0.156186721262664, 0.137192335225767
    ), entry = c(0, 0, 1, 0, 0, 0, 1, 0, 0, 0)), row.names = c(NA, 
10L), class = "data.frame")

For the first case where entry=1

df2=structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("Bangladesh", "Barbados", "Benin", 
"Burundi", "Cameroon", "Chile", "Cyprus", "Ecuador", "Equatorial Guinea", 
"Gabon", "Ghana", "Guatemala", "Guinea", "Guyana", "Haiti", "India", 
"Jordan", "Lebanon", "Liberia", "Madagascar", "Mali", "Mexico", 
"Morocco", "Mozambique", "Nepal", "Nicaragua", "Niger", "Oman", 
"Pakistan", "Panama", "Peru", "Rwanda", "Senegal", "Seychelles", 
"Sierra Leone", "Singapore", "Sri Lanka", "Sudan", "Togo", "Tunisia", 
"Turkey", "Uganda", "Zambia"), class = c("pseries", "factor")), 
    date = structure(16:25, .Label = c("1965", "1966", "1967", 
    "1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", 
    "1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", 
    "1984", "1985", "1986", "1987", "1988", "1989", "1990", "1991", 
    "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", 
    "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", 
    "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
    "2016", "2017", "2018"), class = c("pseries", "factor")), 
    oda_gdp = c(0.186942991013889, 0.164744452026834, 0.192609744294439, 
    0.13752013069625, 0.156186721262664, 0.137192335225767, 0.131167382827501, 
    0.139945790928319, 0.112553104508006, 0.1172188903714), entry = c(0, 
    0, 1, 0, 0, 0, 0, 0, 0, 0)), row.names = 5:14, class = "data.frame")

For the second case where entry=1

I’m guessing split might be the way to go, but I can’t figure out a way to include the conditions.
Notice that all newly created dataframes should be the same size.
Any help is much appreciated!

>Solution :

I don’t think split is the right tool here. Instead, you can do:

lapply(which(df$entry == 1), function(i) df[(i-2):(i+7),])
#> [[1]]
#>       country date   oda_gdp entry
#> 1  Bangladesh 1976 0.1508089     0
#> 2  Bangladesh 1977 0.1744123     0
#> 3  Bangladesh 1978 0.1991769     1
#> 4  Bangladesh 1979 0.1936169     0
#> 5  Bangladesh 1980 0.1869430     0
#> 6  Bangladesh 1981 0.1647445     0
#> 7  Bangladesh 1982 0.1926097     1
#> 8  Bangladesh 1983 0.1375201     0
#> 9  Bangladesh 1984 0.1561867     0
#> 10 Bangladesh 1985 0.1371923     0
#> 
#> [[2]]
#>       country date   oda_gdp entry
#> 5  Bangladesh 1980 0.1869430     0
#> 6  Bangladesh 1981 0.1647445     0
#> 7  Bangladesh 1982 0.1926097     1
#> 8  Bangladesh 1983 0.1375201     0
#> 9  Bangladesh 1984 0.1561867     0
#> 10 Bangladesh 1985 0.1371923     0
#> 11 Bangladesh 1986 0.1311674     0
#> 12 Bangladesh 1987 0.1399458     0
#> 13 Bangladesh 1988 0.1125531     0
#> 14 Bangladesh 1989 0.1172189     0
#> 
#> [[3]]
#>       country date    oda_gdp entry
#> 13 Bangladesh 1988 0.11255310     0
#> 14 Bangladesh 1989 0.11721889     0
#> 15 Bangladesh 1990 0.12099113     1
#> 16 Bangladesh 1991 0.09408679     0
#> 17 Bangladesh 1992 0.08577246     0
#> 18 Bangladesh 1993 0.06530998     0
#> 19 Bangladesh 1994 0.07141897     0
#> 20 Bangladesh 1995 0.04701152     0
#> 21 Bangladesh 1996 0.04460686     0
#> 22 Bangladesh 1997 0.04145223     0

Created on 2022-02-08 by the reprex package (v2.0.1)

Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading