Sorting data to get size, mean and SD for each group in R

Solution 1:

library(dplyr)
library(tidyr)
dat %>% 
    pivot_longer(cols = matches("Tall|Group"), names_to = c(".value", "key"), names_pattern = "([A-Za-z]*)(\\d+)$") %>% 
    group_by(key, Group) %>% 
    summarise(No=n(), Mean = mean(Tall), SD = sd(Tall), .groups = "drop") %>% 
    pivot_wider(names_from = Group, values_from = c(No, Mean, SD), names_sep = "")

# A tibble: 4 × 7
  key     NoM   NoN MeanM MeanN   SDM   SDN
  <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 1         2     3  25    33.7   0    7.51
2 2         3     2  20    18     4    8.49
3 3         2     3  33.5  37    14.8 12.1 
4 4         2     3  72.5  45.3  24.7 13.7 

Solution 2:

This should work. I couldn't think of a good way to get the pairs of columns together, so had to kind of force it.

library(dplyr)
library(tidyr)

dat_value<-dat %>% select(contains("Tall")) %>% pivot_longer(cols=1:4,names_to = "category",values_to="values")

dat_group<-dat %>% select(contains("Group")) %>% pivot_longer(cols=1:4,names_to = "category",values_to="group") %>%
  select(group)

new_dat<-dat_value %>%
  bind_cols(dat_group) %>%
  group_by(category) %>%
  summarize(NoM=sum(group=="M"),
            MeanM=mean(values[group=="M"]),
            SDM=sd(values[group=="M"]),
            NoN=sum(group=="N"),
            MeanN=mean(values[group=="N"]),
            SDN=sd(values[group=="N"])
            ) %>%
  ungroup()

new_dat
#> # A tibble: 4 × 7
#>   category   NoM MeanM   SDM   NoN MeanN   SDN
#>   <chr>    <int> <dbl> <dbl> <int> <dbl> <dbl>
#> 1 Tall1        2  25     0       3  33.7  7.51
#> 2 Tall2        3  20     4       2  18    8.49
#> 3 Tall3        2  33.5  14.8     3  37   12.1 
#> 4 Tall4        2  72.5  24.7     3  45.3 13.7

Created on 2022-01-14 by the reprex package (v2.0.1)