How to groupby and back-fill only certain groups

Solution 1:

One option would be to create a second column, duplicating only the groups that you wanted to fill. Then, I use coalesce to combine the two columns together.

library(tidyverse)

df %>% 
  mutate(return2 = ifelse(firms %in% c("B", "C"), return, NA)) %>% 
  group_by(firms) %>% 
  fill(return2, .direction="up") %>% 
  mutate(return = coalesce(return, return2)) %>% 
  select(-return2)

Another option is to create a new dataframe with the groups that you want to fill, then join the data back to the original dataframe. Then, I apply coalesce to the two columns that start with "return".

df %>% 
  filter(firms != "A") %>% 
  group_by(firms) %>% 
  fill(return, .direction="up") %>% 
  left_join(df, ., by = c("date", "firms")) %>% 
  mutate(return = coalesce(!!!select(., starts_with("return")))) %>% 
  select(-c(return.x, return.y))

Another option is to split the dataframe by groups into a list of tibbles. Then, I select the groups to fill, then bind back together.

df %>%
    group_split(firms, .keep = TRUE) %>% 
    map_at(c(2:3), fill, return, .direction="up") %>% 
    map_dfr(., bind_rows)

Output

   date firms return
  <int> <chr>  <int>
1  1999 A          5
2  2000 A         NA
3  2001 A          6
4  1999 B          9
5  2000 B         10
6  2001 B         10
7  1999 C          8
8  2000 C          3
9  2001 C          3

Solution 2:

This is a simple and intuitive solution:

library(data.table)
df %>% group_by(firms) %>% 
mutate(return = ifelse(firms %in% c("B", "C"), nafill(return, type ="locf"), return))

>
  date firms return
1 1999     A      5
2 2000     A     NA
3 2001     A      6
4 1999     B      9
5 2000     B      9
6 2001     B     10
7 1999     C      8
8 2000     C      8
9 2001     C      3
>

Solution 3:

# Function to fill value down: fill_down => function
fill_down <- function(vec){
  # Stop if vec isn't a vector:
  stopifnot(
    is.vector(vec)
  )
  # Explicitly define returned object: vector => env
  return(
    na.omit(vec)[cumsum(Negate(is.na)(vec))]
  )
}

# Function to negate logical %in% test: not_in => function
not_in <- function(x, y){
  # Stop if x and y arent vectors: 
  stopifnot(
    all(
      vapply(
        list(x, y), 
        is.vector, 
        logical(1), 
        USE.NAMES = FALSE
      )
    )
  )
  # Explicitly define the returned object: logical vector => env
  return(
    Negate(`%in%`)(x, y)
  )
}

# Function to assign index: assign_idx => function
assign_idx <- function(df){
  # Function to avoid namespace collision: .resolve_unused_vec_name => function 
  .resolve_unused_vec_name <- function(df, vec_name = "idx", i = 1){
    # Check if any data.frame vector names are named the idx_vec_name
    if(Negate(hasName)(df, vec_name)){
      # If not; use the idx_vec_name => charact scalar env
      return(vec_name)
      # Otherwise:
    }else{
      # Increment i: i => integer scalar
      i <- i + 1
      # If this is the first iteration:
      if(i == 1){
        # Create the name of the index vector: vec_name => character scalar
        vec_name <- paste(
          vec_name, 
          as.character(i),
          sep = "_"
        )
        # Otherwise
      }else{
        # Rename the index vector: vec_name => character scalar
        vec_name <- gsub(
          "\\_\\d+$",
          paste0(
            "_", 
            as.character(i)
          ),
          vec_name
        )
      }
      # Re-apply the function with the adjusted vector name: 
      return(
        .resolve_unused_vec_name(
          df, 
          vec_name = vec_name,
          i = i
        )
      )
    }
  }
  # Resolve an unused vector name: idx_vec_name => character scalar
  idx_vec_name <- .resolve_unused_vec_name(df)
  # Assign index to data.frame: 
  df[,idx_vec_name] <- seq_len(
    nrow(
      df
    )
  )
  # Return the data.frame and index vector name: 
  # list of data.frame and character scalar => env
  return(
    list(
      df, 
      idx_vec_name
    )
  )
}

# Function to stop if not list of data.frames: 
# stop_if_not_list_of_data.frames => function
stop_if_not_list_of_data.frames <- function(df_lst){
  # Stop if df_lst is not a list of data.frames: 
  return(
    stopifnot(
      all(
        c(
          vapply(
            df_lst, 
            is.data.frame, 
            logical(1), 
            USE.NAMES = FALSE
          ),
          is.list(df_lst)
        )    
      ) 
    )
  )
}

# Function to stop if the vectors aren't present in a given data.frame:
# stop_if_not_vectors_in_df => function
stop_if_not_vectors_in_df <- function(df, vec_to_check){
  # If the names aren't present as vector names in the data.frame 
  # stop execution: 
  return(
    stopifnot(
      all(
        vec_to_check %in% colnames(df)
      )
    )
  )
}

# Function create list of data.frames based on logical condition: 
# df_2_list_of_dfs => function
logically_split_df_2_list_of_dfs <- function(df, logical_vec){
  # Stop if df isn't a data.frame and logical_vector isn't of type logical
  stopifnot(
    all(
      c(
        is.data.frame(df),
        is.logical(logical_vec)
      )
    )
  )
  # Allocate some memory: df_lst => empty list
  df_lst <- vector(
    "list", 
    2
  )
  # Split the data.frame into a list: df_lst => list of data.frames
  df_lst <- with(
    df, 
    split(
      df, 
      logical_vec
    )
  )
  # Explicitly define the returned object: list of data.frames => env
  return(df_lst)
}

# Function to apply function on list of data.frames and combine result: 
# combine_list_2_df => function
combine_list_2_df <- function(
  df_lst, 
  combination_func = c(rbind, cbind)){
  # Stop if not a list of data.frames: 
  stop_if_not_list_of_data.frames(df_lst)
  # Resolve the desired funciton to combine the list: 
  # cmbn_func_resolved => function
  cmbn_func_resolved <- match.fun(combination_func)
  # Combine list data.frame: res => data.frame
  res <- data.frame(
    do.call(
      cmbn_func_resolved, 
      df_lst
    ),
    stringsAsFactors = FALSE,
    row.names = NULL
  )
  # Explicitly define the returned object: data.frame => env
  return(res)
}

# Function to parse ellipses: parse_ellipsis => function
parse_ellipsis <- function(..., truncate_arg_names = TRUE){
  # Test if all arguments are characters: arg_vec_is_char => logical scalar 
  arg_vec_is_char <- all(
    vapply(
      list(...), 
      is.character,
      logical(1), 
      USE.NAMES = FALSE
    )
  )
  # If the args provided are a character vector
  if(arg_vec_is_char){
    # Coerce to a character vector: vec_str => character vector
    vec_str <- as.character(
      unlist(
        list(...)
      )
    )
  }else{
    # Substitute the optional args: order_vecs => list of calls
    vecs <- substitute(
      list(...)
    )[-1]
    # Parse the calls: vec_str => character vector
    vec_str <- vapply(
      vecs, 
      deparse,
      character(1)
    )
  }
  # If we want to truncate the provided arguments: 
  if(truncate_arg_names){
    # Drop any prefixes of data.frame name etc: 
    # vec_str_wo_prefix => character vector
    vec_str <- gsub(
      ".*\\$(\\w+|\\d+|\\s+)$", 
      "\\1", 
      vec_str
    )
  }
  # Explicitly define the returned object: character vector => env
  return(vec_str)
}

# Function to order a data.frame by a vector: order_by => function
order_by <- function(df, ..., decreasing = FALSE){
  # Stop if a data.frame hasnt been provided: 
  stopifnot(
    is.data.frame(df)
  )
  # Parse the ellipsis arguments: order_vec_str_wo_prefix => character vector
  order_vec_str_wo_prefix <- parse_ellipsis(...)
  # Stop if vectors aren't in data.frame: 
  stop_if_not_vectors_in_df(
    df, 
    order_vec_str_wo_prefix
  )
  # If order by descending: 
  if(decreasing){
    # Sort the data.frame by the given vectors: sort_order => integer vector
    sort_order <- rev(
      do.call(
        "order", 
        df[, order_vec_str_wo_prefix, drop = FALSE]
      )
    )
    # Otherwise: 
  }else{
    # Sort the data.frame by the given vectors: sort_order => integer vector
    sort_order <- do.call(
      "order", 
      df[, order_vec_str_wo_prefix, drop = FALSE]
    )
  }
  # Order the data.frame by the sort order: ordered_df => data.frame
  ordered_df <- df[sort_order,]
  # Explicitly define the returned object: data.frame => env
  return(ordered_df)
}

# Drop vectors in data.frame: drop_columns => function()
drop_columns <- function(df, ...){
  # Stop if a data.frame hasn't been provided: 
  stopifnot(
    is.data.frame(df)
  )
  # Parse the ellipsis argument: col_names_to_drop => character vector
  col_names_to_drop <- parse_ellipsis(...)
  # Stop if the vectors aren't in the data.frame:
  stop_if_not_vectors_in_df(
    df, 
    col_names_to_drop
  )
  # Drop the columns: df => data.frame
  df[,col_names_to_drop] <- NULL
  # Return the data.frame: data.frame => env
  return(df)
}

# Function to apply function to data.frame meeting logical test:
# lapply_to_true_df => function() 
lapply_to_true_df <- function(df_lst, function_to_apply = fill_down, ...){
  # Resolve the function to be applied to vectors: 
  function_resolved <- match.fun(function_to_apply)
  # Resolve the vectors apply the function to: 
  # vector_names_resolved => character vector
  vector_names_resolved <- parse_ellipsis(...)
  # Apply function to partition of data.frame in list that met logical 
  # condition: res => list of data.frames
  res <- lapply(
    seq_along(df_lst),
    function(i){
      if(names(df_lst)[i] == "TRUE"){
        df_lst[[i]][,vector_names_resolved] <- lapply(
          vector_names_resolved,
          function(x){
            function_resolved(
              as.vector(
                df_lst[[i]][, x, drop = TRUE]
              )
            )
          }
        )
      }
      df_lst[[i]]
    }
  )
  # Explicitly define the returned object: data.frame => env
  res
}

# Define the main function: main => function
main <- function(){
  # Input data: df => data.frame
  df <- structure(list(date = c(1999L, 2000L, 2001L, 1999L, 2000L, 2001L, 
    1999L, 2000L, 2001L), firms = c("A", "A", "A", "B", "B", "B", 
    "C", "C", "C"), return = c(5L, NA, 6L, 9L, NA, 10L, 8L, NA, 3L
    )), class = "data.frame", row.names = c(NA, -9L))
  # Allocate some memory for the index assigment: tmp_lst => empty list
  tmp_lst <- vector("list", 2)
  # Assign the index to the data.frame: tmp_lst => list of data.frame and character
  # scalar denoting the name of the index vector
  tmp_lst <- assign_idx(df)
  # Split the data.frame based on logical test: df_lst => list of data.frames
  df_lst <- with(
    tmp_lst[[1]], 
    logically_split_df_2_list_of_dfs(
      tmp_lst[[1]], 
      not_in(
        firms, 
        "A"
      )
    )
  )
  # Apply-Combine function and combine back to data.frame: res => data.frame
  res <- drop_columns(
    order_by(
      combine_list_2_df(
        lapply_to_true_df(
          df_lst,
          fill_down, 
          return
        ),
        rbind
      ),
      tmp_lst[[2]]
    ),
    tmp_lst[[2]]
  )
  # Output the result to the console: data.frame => stdout(console)
  return(res)
}

# Execute main if called:
if (sys.nframe() == 0){
  # Execute the main function: data.frame => stdout(console)
  main()
}

Solution 4:

You can subset observations and then mutate columns

library(data.table)
df <- data.table(df)
df[firms %in% c("B", "C"), return := nafill(return, type = "nocb"), by = firms]

Can not set path variable in mac for KobWeb

Python/Pandas - If Column A equals X or Y, then assign value from Col B. If not, then assign Col C. How to write in Python?

Is it possible to turn off all CSS animations in Chrome?

how to display digits in a number printed, separated by two spaces? in python

Why does C++ automatically rounds float [duplicate]

Run test in phpunit with specific php version

How to convert a matrix dataframe into three column table in Python [duplicate]

Speech Recognition API in Microsoft Edge (Not defined)

How to explicitly comment empty methods?

Create directory for App available to Files app [duplicate]

Using Django-modeltranslation in combination with PostgreSQL SearchVector

Narrowing down types of co-dependent, union-based, generic function arguments

How to groupby and back-fill only certain groups

Solution 1:

Solution 2:

Solution 3:

Solution 4:

Related

Recent Posts