R join two dataframes but keep only one of duplicate columns

Solution 1:

We can use {powerjoin}:

data

df1 <- iris[c(1,51),4:5]
df2 <- iris[c(6,56),2:5]

df1
#>    Petal.Width    Species
#> 1          0.2     setosa
#> 51         1.4 versicolor

df2
#>    Sepal.Width Petal.Length Petal.Width    Species
#> 6          3.9          1.7         0.4     setosa
#> 56         2.8          4.5         1.3 versicolor

solution

keep version from left table

library(powerjoin)
power_full_join(df1, df2, by = "Species", conflict = coalesce_xy)
#>      Species Sepal.Width Petal.Length Petal.Width
#> 1     setosa         3.9          1.7         0.2
#> 2 versicolor         2.8          4.5         1.4

keep version from right table

power_full_join(df1, df2, by = "Species", conflict = coalesce_yx)
#>      Species Sepal.Width Petal.Length Petal.Width
#> 1     setosa         3.9          1.7         0.4
#> 2 versicolor         2.8          4.5         1.3

combine them

power_full_join(df1, df2, by = "Species", conflict = `+`)
#>      Species Sepal.Width Petal.Length Petal.Width
#> 1     setosa         3.9          1.7         0.6
#> 2 versicolor         2.8          4.5         2.7

power_full_join(df1, df2, by = "Species", conflict = pmin)
#>      Species Sepal.Width Petal.Length Petal.Width
#> 1     setosa         3.9          1.7         0.2
#> 2 versicolor         2.8          4.5         1.3

If you don't want to resolve the conflict but want to fail if it happens, to avoid creating extra columns or desperately looking for columns renamed automatically, use the check argument (you can also "warn" or "inform")

power_full_join(df1, df2, by = "Species", check = check_specs(column_conflict = "abort"))
#> Error in `check_column_conflict()`:
#> ! The following columns are conflicted and their conflicts are not handled:  'Petal.Width'