Using column name in loop with dplyr pipes

May 30, 2022

I have a simple question, but I can’t find the answer. I will use an reproducible example to explain my problem: I have a dataset ("aus_production", tsibbledata package’s dataset) and I want to run an ARIMA for each variable – "Beer", "Tobacco", "Bricks", "Cement", "Electricity" and "Gas".

library(tidyverse)
library(tsibble)
library(fable)
library(tsibbledata)

# Importing dataset
aus_production <- tsibbledata::aus_production

# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
  fit <- aus_production %>% model(arima = ARIMA(i))
  print(fit)
}

My question is: how can I loop column names (sting) and put one at a time inside the dplyr pipes? The problem lies in the fact that i = "Beer", not Beer (without quotation marks).

Inside the loop, the code runned is fit <- aus_production %>% model(arima = ARIMA("Beer")) an it gives an Error. The correct form would be fit <- aus_production %>% model(arima = ARIMA(Beer)) (without the quotation marks).

I have tried a couple things, like:

fit <- aus_production %>% model(arima = ARIMA(aus_production[,i]))

fit <- aus_production %>% model(arima = ARIMA(aus_production$i))

My problem is not inherently from the fable package, I’m just using it as an example.

>Solution :

You could do it with a pivot:

library(tidyverse)
library(tsibble)
#> 
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, union
library(fable)
#> Loading required package: fabletools
library(tsibbledata)
library(broom)

out <- aus_production %>% 
  pivot_longer(-Quarter, names_to="var", values_to="vals") %>% 
  group_by(var) %>% 
  model(ARIMA(vals ~ 0 + pdq(0,1,1) + PDQ(0,1,1))) %>% 
  setNames(c("var", "model"))


out
#> # A mable: 6 x 2
#> # Key:     var [6]
#>   var                            model
#>   <chr>                        <model>
#> 1 Beer        <ARIMA(0,1,1)(0,1,1)[4]>
#> 2 Bricks      <ARIMA(0,1,1)(0,1,1)[4]>
#> 3 Cement      <ARIMA(0,1,1)(0,1,1)[4]>
#> 4 Electricity <ARIMA(0,1,1)(0,1,1)[4]>
#> 5 Gas         <ARIMA(0,1,1)(0,1,1)[4]>
#> 6 Tobacco     <ARIMA(0,1,1)(0,1,1)[4]>

out %>% 
  rowwise %>% 
  summarise(var = first(var), 
            mod = tidy(model)) %>% 
  unnest(mod)
#> # A tibble: 12 × 6
#>    var         term  estimate std.error statistic  p.value
#>    <chr>       <chr>    <dbl>     <dbl>     <dbl>    <dbl>
#>  1 Beer        ma1     -0.741    0.0411    -18.1  7.89e-45
#>  2 Beer        sma1    -0.695    0.0569    -12.2  2.47e-26
#>  3 Bricks      ma1      0.147    0.0694      2.12 3.49e- 2
#>  4 Bricks      sma1    -0.859    0.0381    -22.5  5.89e-56
#>  5 Cement      ma1     -0.258    0.0633     -4.07 6.57e- 5
#>  6 Cement      sma1    -0.832    0.0408    -20.4  6.57e-52
#>  7 Electricity ma1     -0.556    0.0771     -7.22 9.22e-12
#>  8 Electricity sma1    -0.731    0.0396    -18.5  4.57e-46
#>  9 Gas         ma1     -0.311    0.0714     -4.35 2.09e- 5
#> 10 Gas         sma1    -0.557    0.0501    -11.1  6.08e-23
#> 11 Tobacco     ma1     -0.807    0.0576    -14.0  4.38e-31
#> 12 Tobacco     sma1    -0.749    0.0606    -12.4  4.13e-26

^{Created on 2022-05-30 by the reprex package (v2.0.1)}

To answer your original question, you can use !!sym() around your character variable string:

# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
  fit <- aus_production %>% model(arima = ARIMA(!!sym(i) ~ 0 + pdq(1,1,1)))
  print(tidy(fit) %>% mutate(var = i))
}
#> # A tibble: 5 × 7
#>   .model term  estimate std.error statistic  p.value var  
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>    <dbl> <chr>
#> 1 arima  ar1     -0.337    0.0810     -4.16 4.55e- 5 Beer 
#> 2 arima  ma1     -0.597    0.0663     -9.00 1.26e-16 Beer 
#> 3 arima  sar1    -0.814    0.115      -7.08 2.00e-11 Beer 
#> 4 arima  sma1     0.194    0.101       1.91 5.74e- 2 Beer 
#> 5 arima  sma2    -0.678    0.0671    -10.1  6.70e-20 Beer 
#> # A tibble: 5 × 7
#>   .model term  estimate std.error statistic  p.value var    
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>    <dbl> <chr>  
#> 1 arima  ar1      0.264    0.0819     3.22  1.50e- 3 Tobacco
#> 2 arima  ma1     -0.908    0.0379   -24.0   3.71e-59 Tobacco
#> 3 arima  sar1     0.450    0.414      1.09  2.79e- 1 Tobacco
#> 4 arima  sma1    -1.04     0.433     -2.40  1.73e- 2 Tobacco
#> 5 arima  sma2     0.178    0.307      0.579 5.63e- 1 Tobacco
#> # A tibble: 5 × 7
#>   .model term  estimate std.error statistic   p.value var   
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>     <dbl> <chr> 
#> 1 arima  ar1    0.293       0.323    0.907  0.366     Bricks
#> 2 arima  ma1   -0.137       0.330   -0.415  0.678     Bricks
#> 3 arima  sar1  -0.830       0.236   -3.51   0.000553  Bricks
#> 4 arima  sma1   0.00262     0.220    0.0119 0.991     Bricks
#> 5 arima  sma2  -0.742       0.184   -4.03   0.0000792 Bricks
#> # A tibble: 3 × 7
#>   .model term  estimate std.error statistic  p.value var   
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>    <dbl> <chr> 
#> 1 arima  ar1    -0.239     0.228     -1.05  2.96e- 1 Cement
#> 2 arima  ma1    -0.0382    0.232     -0.164 8.70e- 1 Cement
#> 3 arima  sma1   -0.823     0.0426   -19.3   1.09e-48 Cement
#> # A tibble: 5 × 7
#>   .model term  estimate std.error statistic  p.value var        
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>    <dbl> <chr>      
#> 1 arima  ar1      0.245    0.137       1.80 7.39e- 2 Electricity
#> 2 arima  ma1     -0.739    0.107      -6.91 5.68e-11 Electricity
#> 3 arima  sar1     0.893    0.0851     10.5  5.09e-21 Electricity
#> 4 arima  sma1    -1.73     0.0941    -18.4  6.94e-46 Electricity
#> 5 arima  sma2     0.791    0.0752     10.5  4.33e-21 Electricity
#> # A tibble: 3 × 7
#>   .model term  estimate std.error statistic  p.value var  
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>    <dbl> <chr>
#> 1 arima  ar1      0.700    0.0779      8.99 1.34e-16 Gas  
#> 2 arima  ma1     -0.936    0.0444    -21.1  4.67e-54 Gas  
#> 3 arima  sma1    -0.518    0.0586     -8.84 3.64e-16 Gas

^{Created on 2022-05-30 by the reprex package (v2.0.1)}