Function to filter rows under certain conditions in a data set
I want to delete all rows in this dataset where coder = 6
, under the condition that for the same id and arm the value coder 99 is present. In the picture these are the lines 5, 7, 11, 15 and 17. Does anyone know how I can proceed?
df = structure(list(id = c("001", "003", "003", "006", "007", "007",
"012", "012", "017", "020", "022", "022", "025", "026", "030",
"030", "030", "030", "034", "034", "036", "036", "038", "040",
"040", "042", "042", "047", "047", "049", "049", "051", "051",
"052", "052", "054", "054", "059", "061", "062", "062", "063",
"063", "063", "063", "064", "064", "065", "065", "065", "065",
"066", "066", "067", "067", "068", "068", "069", "069", "073",
"075", "076", "076", "077", "081", "081", "085", "093", "093",
"095", "095", "096", "096", "096", "096", "101", "102", "102",
"104", "104", "106", "106", "107", "107", "107", "108", "110",
"114", "114", "115", "115", "116", "116", "116", "120", "201",
"201", "201", "202", "202", "202", "203", "204", "205", "206",
"207"), arm = c(1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1,
1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 3, 1,
2, 3, 1, 1, 1, 1, 1), coder = c(6, 6, 6, 6, 6, 99, 6, 99, 6,
6, 6, 99, 6, 6, 6, 99, 6, 99, 6, 6, 6, 99, 6, 6, 99, 6, 99, 6,
99, 6, 6, 6, 6, 6, 6, 6, 99, 6, 6, 6, 99, 6, 99, 6, 99, 6, 99,
6, 99, 6, 99, 6, 99, 6, 99, 6, 99, 6, 6, 6, 6, 6, 99, 6, 6, 6,
6, 6, 99, 6, 99, 6, 99, 6, 99, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 99, 6, 99, 6, 6, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99)), row.names = c(NA, -106L), class = c("tbl_df", "tbl",
"data.frame"))
Solution 1:
This should work
library(dplyr)
df %>%
group_by(id, arm) %>%
filter(!(any(coder == 99) & coder == 6))
# A tibble: 80 x 3
# Groups: id, arm [80]
id arm coder
<chr> <dbl> <dbl>
1 001 1 6
2 003 1 6
3 003 2 6
4 006 1 6
5 007 1 99
6 012 1 99
7 017 1 6
8 020 1 6
9 022 1 99
10 025 1 6
# ... with 70 more rows
Solution 2:
A base R option using subset
+ ave
subset(
df,
!(ave(coder == 99, id, arm, FUN = any) & coder == 6)
)
gives
id arm coder
<chr> <dbl> <dbl>
1 001 1 6
2 003 1 6
3 003 2 6
4 006 1 6
5 007 1 99
6 012 1 99
7 017 1 6
8 020 1 6
9 022 1 99
10 025 1 6
# ... with 70 more rows