Sorting data to get size, mean and SD for each group in R
Solution 1:
library(dplyr)
library(tidyr)
dat %>%
pivot_longer(cols = matches("Tall|Group"), names_to = c(".value", "key"), names_pattern = "([A-Za-z]*)(\\d+)$") %>%
group_by(key, Group) %>%
summarise(No=n(), Mean = mean(Tall), SD = sd(Tall), .groups = "drop") %>%
pivot_wider(names_from = Group, values_from = c(No, Mean, SD), names_sep = "")
# A tibble: 4 × 7
key NoM NoN MeanM MeanN SDM SDN
<chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 25 33.7 0 7.51
2 2 3 2 20 18 4 8.49
3 3 2 3 33.5 37 14.8 12.1
4 4 2 3 72.5 45.3 24.7 13.7
Solution 2:
This should work. I couldn't think of a good way to get the pairs of columns together, so had to kind of force it.
library(dplyr)
library(tidyr)
dat_value<-dat %>% select(contains("Tall")) %>% pivot_longer(cols=1:4,names_to = "category",values_to="values")
dat_group<-dat %>% select(contains("Group")) %>% pivot_longer(cols=1:4,names_to = "category",values_to="group") %>%
select(group)
new_dat<-dat_value %>%
bind_cols(dat_group) %>%
group_by(category) %>%
summarize(NoM=sum(group=="M"),
MeanM=mean(values[group=="M"]),
SDM=sd(values[group=="M"]),
NoN=sum(group=="N"),
MeanN=mean(values[group=="N"]),
SDN=sd(values[group=="N"])
) %>%
ungroup()
new_dat
#> # A tibble: 4 × 7
#> category NoM MeanM SDM NoN MeanN SDN
#> <chr> <int> <dbl> <dbl> <int> <dbl> <dbl>
#> 1 Tall1 2 25 0 3 33.7 7.51
#> 2 Tall2 3 20 4 2 18 8.49
#> 3 Tall3 2 33.5 14.8 3 37 12.1
#> 4 Tall4 2 72.5 24.7 3 45.3 13.7
Created on 2022-01-14 by the reprex package (v2.0.1)