How can I match duplicated values in a column, examine the condition and output a final result in R?
Maybe something like this with tidy verse approach (tidyr::separate)
However it's a quick answer and I'm not sure to understand your data relations as you want.
library(tidyverse)
one <- tibble::tribble(
~ID, ~Position, ~Region,
"a", "1-50", "D1a",
"a", "80-100", "D2a",
"a", "250-300", "D3a",
"b", "50-100", "D1b",
"b", "150-180", "D2b",
"c", "1-20", "D1c",
"c", "50-80", "D2c",
"c", "100-200", "D3c",
"c", "250-300", "D4c"
)
two <- tibble::tribble(
~ID, ~Position,
"a", 28L,
"a", 85L,
"a", 320L,
"b", 55L,
"b", 100L,
"c", 18L,
"c", 45L,
"c", 180L,
"c", 270L
)
one_ <- one %>%
tidyr::separate(Position, c('p_min', 'p_max'), sep = "-") %>%
mutate_at(vars(starts_with('p_')), as.integer)
two %>%
mutate(rn = row_number()) %>%
left_join(one_) %>%
mutate(in_between = (Position >= p_min & Position <= p_max)) %>%
filter(in_between) %>%
distinct(rn, .keep_all = TRUE)
Joining, by = "ID"
# A tibble: 7 × 7
ID Position rn p_min p_max Region in_between
<chr> <int> <int> <int> <int> <chr> <lgl>
1 a 28 1 1 50 D1a TRUE
2 a 85 2 80 100 D2a TRUE
3 b 55 4 50 100 D1b TRUE
4 b 100 5 50 100 D1b TRUE
5 c 18 6 1 20 D1c TRUE
6 c 180 8 100 200 D3c TRUE
7 c 270 9 250 300 D4c TRUE