count how many times a row appears in a different dataset
Solution 1:
You may try
row_appears <- c()
for (i in 1:nrow(df1)){
x <- df1[i,]
y <- df1[i,]
x[is.na(x)] <- "A"
y[is.na(y)] <- "B"
z <- sum(apply(df2, 1, function(t) all(x == t)) + apply(df2, 1, function(t) all(y == t)))
row_appears <- c(row_appears, z)
}
row_appears
[1] 2 1 2 2
Solution 2:
Alternatively, we can treat is as a character match question, converting both data frames into character vectors, and treating NA
as possibly A or B.
df1[is.na(df1)] <- "(A|B)" # regex talk for "might be A or B"
x <- do.call(paste, c(df1, sep = ""))
y <- do.call(paste, c(df2, sep = ""))
x |>
lapply(\(.) stringi::stri_count_regex(y, .)) |>
lapply(sum) |>
unlist(use.names = F)
#> [1] 2 1 2 2
Or for R versions older than 4.1.0:
vapply(x, function(o) sum(stringi::stri_count_regex(y, o)), 1, USE.NAMES = F)
Solution 3:
You can also use {tidyverse} or {data.table}.
library(tidyverse)
df3 <- bind_rows(
df1 |> mutate(across(everything(), replace_na, "A")),
df1 |> mutate(across(everything(), replace_na, "B"))
)
df2 |>
group_by_all() |>
summarise(N = n(), .groups = "drop") |>
right_join(df3, by = paste0("P", 1:5)) |>
mutate(N = replace_na(N, 0))
# # A tibble: 8 x 6
# P1 P2 P3 P4 P5 N
# <chr> <chr> <chr> <chr> <chr> <dbl>
# 1 A B A A B 2
# 2 A B A B B 1
# 3 A B B B B 1
# 4 B B A B B 1
# 5 B B B B B 2
# 6 A B A A A 0
# 7 A A B B A 0
# 8 B B B A A 0
library(data.table)
setDT(df1)
setDT(df2)
df1_a <- df1_b <- copy(df1)
df1_a[is.na(df1_a)] <- "A"
df1_b[is.na(df1_b)] <- "B"
df3 <- rbindlist(list(df1_a, df1_b))
df4 <-
df2[, .N, by = eval(paste0("P", 1:5))
][df3, on = paste0("P", 1:5)]
df4[, N := fifelse(is.na(N), 0, N)][]
# P1 P2 P3 P4 P5 N
# 1: A B A A A 0
# 2: A A B B A 0
# 3: B B B A A 0
# 4: A B A B B 1
# 5: A B A A B 2
# 6: A B B B B 1
# 7: B B B B B 2
# 8: B B A B B 1