replacing zeros with one in R conditional on the values befomre and after

I have a data frame as follows:

df_ex <- tibble(
  id = c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2), 
  flag = c(1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0),
  flag_new = c(1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,0,0)
)

let's assume I have a variable called flag, so for each id, I want to check the number of zeros between ones in that variable. If I have one and there are up to 4 zeros and then there is another one, I would like to replace the zeroes with ones (between the ones). I have made a variable called flag_new that is the real flag that I am trying to generate.

Thanks for your help in advance.


Solution 1:

An option is to create a function with rle, find the min/max index of 1s and apply on the 'flag' column after grouping by 'id'

library(dplyr)
  
f1 <- function(x, n = 4) {
   x1 <- rle(x)
   i1 <- x1$lengths <= 4 & x1$values == 0 & 
      !seq_along(x1$values) %in% c(1, length(x1$values))
   x1$values[i1] <- 1
  inverse.rle(x1)
 }
df_ex %>% 
   group_by(id) %>% 
   mutate(flag_new2 = f1(flag, 4)) %>%
   ungroup

-output

# A tibble: 18 × 4
      id  flag flag_new flag_new2
   <dbl> <dbl>    <dbl>     <dbl>
 1     1     1        1         1
 2     1     0        1         1
 3     1     0        1         1
 4     1     1        1         1
 5     1     1        1         1
 6     1     0        0         0
 7     1     0        0         0
 8     1     0        0         0
 9     2     1        1         1
10     2     1        1         1
11     2     0        1         1
12     2     0        1         1
13     2     1        1         1
14     2     0        0         0
15     2     0        0         0
16     2     0        0         0
17     2     0        0         0
18     2     0        0         0

-testing when the number of 0s are greater than 4 between 1s

> df_ex$flag[10:16] <- 0
> df_ex$flag[17] <- 1
> df_ex %>% 
+    group_by(id) %>% 
+    mutate(flag_new2 = f1(flag, 4)) %>%
+    ungroup
# A tibble: 18 × 4
      id  flag flag_new flag_new2
   <dbl> <dbl>    <dbl>     <dbl>
 1     1     1        1         1
 2     1     0        1         1
 3     1     0        1         1
 4     1     1        1         1
 5     1     1        1         1
 6     1     0        0         0
 7     1     0        0         0
 8     1     0        0         0
 9     2     1        1         1
10     2     0        1         0
11     2     0        1         0
12     2     0        1         0
13     2     0        1         0
14     2     0        0         0
15     2     0        0         0
16     2     0        0         0
17     2     1        0         1
18     2     0        0         0

Solution 2:

Here is an alternative approach for the provided data:

  1. group
  2. replace 0 with NA
  3. fill up
  4. replace NA with 0
library(tidyr)
library(dplyr)

df_ex %>% 
  group_by(id) %>%
  mutate(flag_new1 = na_if(flag, 0)) %>% 
  fill(flag_new1, .direction = "up") %>% 
  mutate(flag_new1 = replace_na(flag_new1, 0)) %>% 
  ungroup()
      id  flag flag_new flag_new1
   <dbl> <dbl>    <dbl>     <dbl>
 1     1     1        1         1
 2     1     0        1         1
 3     1     0        1         1
 4     1     1        1         1
 5     1     1        1         1
 6     1     0        0         0
 7     1     0        0         0
 8     1     0        0         0
 9     2     1        1         1
10     2     1        1         1
11     2     0        1         1
12     2     0        1         1
13     2     1        1         1
14     2     0        0         0
15     2     0        0         0
16     2     0        0         0
17     2     0        0         0
18     2     0        0         0