R aggregate by label difference

I have a large data frame containing import and export data of 28 EU countries, among other details. I want to calculate the trade balance (exports minus imports) of each country.

I managed to aggregate imports and exports for each country using this code:

x = aggregate( value_in_euros ~ as_factor(flow) + as_factor(declarant), data, sum)

And the resulting data set looks like this:

    as_factor(flow) as_factor(declarant) value_in_euros
1          Imports                   FR   1.019259e+12
2          Exports                   FR   8.539884e+11
3          Imports                   BE   6.936985e+11
4          Exports                   BE   7.350930e+11
5          Imports                   NL   1.043084e+12
6          Exports                   NL   1.180904e+12
7          Imports                   DE   2.048670e+12
8          Exports                   DE   2.415090e+12
9          Imports                   IT   7.399388e+11
10         Exports                   IT   8.671186e+11
11         Imports                   GB   9.705866e+10
12         Exports                   GB   6.669265e+10
13         Imports                   IE   1.725745e+11
14         Exports                   IE   3.150999e+11
15         Imports                   DK   1.707441e+11
16         Exports                   DK   1.897421e+11

Now I need an aggregate way to calculate the trade balance (Exports minus Imports) for each country. Can anyone provide me with some hint as to how to do it? Thanks.


We may use split to split the dataframe by declarant and then compute the difference between flows.

set.seed(1)
df <- data.frame(flow=rep(c('Imports','Exports'), 8),
                 declarant=rep(c('FR','BE','NL','DE','IT','GB','IE','DK'), each=2),
                 value_in_euros=rnorm(n=16L,mean=1e12,sd=1e4))
out <- sapply(split(df, df$declarant), function(x) diff(x$value_in_euros))
# ------------------------------------------
> out[1:4]
        BE         DE         DK         FR 
 24309.094   2508.957 -11698.645   8100.971 

Consider reshaping after aggregation. Then, run the difference.

agg_df <- aggregate(
    value_in_euros ~ flow + declarant, data=data, FUN=sum
) |> reshape(
  idvar = "declarant",
  v.names = "value_in_euros",
  timevar = "flow",
  direction = "wide",
  sep = "_"
) |> transform(
  balance = value_in_euros_Exports - value_in_euros_Imports
)

agg_df
   declarant value_in_euros_Imports value_in_euros_Exports       balance
1         FR           1.019259e+12           8.539884e+11 -165270600000
3         BE           6.936985e+11           7.350930e+11   41394500000
5         NL           1.043084e+12           1.180904e+12  137820000000
7         DE           2.048670e+12           2.415090e+12  366420000000
9         IT           7.399388e+11           8.671186e+11  127179800000
11        GB           9.705866e+10           6.669265e+10  -30366010000
13        IE           1.725745e+11           3.150999e+11  142525400000
15        DK           1.707441e+11           1.897421e+11   18998000000

data

txt = '       flow              declarant value_in_euros
1          Imports                   FR   1.019259e+12
2          Exports                   FR   8.539884e+11
3          Imports                   BE   6.936985e+11
4          Exports                   BE   7.350930e+11
5          Imports                   NL   1.043084e+12
6          Exports                   NL   1.180904e+12
7          Imports                   DE   2.048670e+12
8          Exports                   DE   2.415090e+12
9          Imports                   IT   7.399388e+11
10         Exports                   IT   8.671186e+11
11         Imports                   GB   9.705866e+10
12         Exports                   GB   6.669265e+10
13         Imports                   IE   1.725745e+11
14         Exports                   IE   3.150999e+11
15         Imports                   DK   1.707441e+11
16         Exports                   DK   1.897421e+11'

agg_df <- read.table(text=txt)