Split data frame string column into multiple columns
Use stringr::str_split_fixed
library(stringr)
str_split_fixed(before$type, "_and_", 2)
Another option is to use the new tidyr package.
library(dplyr)
library(tidyr)
before <- data.frame(
attr = c(1, 30 ,4 ,6 ),
type = c('foo_and_bar', 'foo_and_bar_2')
)
before %>%
separate(type, c("foo", "bar"), "_and_")
## attr foo bar
## 1 1 foo bar
## 2 30 foo bar_2
## 3 4 foo bar
## 4 6 foo bar_2
5 years later adding the obligatory data.table
solution
library(data.table) ## v 1.9.6+
setDT(before)[, paste0("type", 1:2) := tstrsplit(type, "_and_")]
before
# attr type type1 type2
# 1: 1 foo_and_bar foo bar
# 2: 30 foo_and_bar_2 foo bar_2
# 3: 4 foo_and_bar foo bar
# 4: 6 foo_and_bar_2 foo bar_2
We could also both make sure that the resulting columns will have correct types and improve performance by adding type.convert
and fixed
arguments (since "_and_"
isn't really a regex)
setDT(before)[, paste0("type", 1:2) := tstrsplit(type, "_and_", type.convert = TRUE, fixed = TRUE)]
Yet another approach: use rbind
on out
:
before <- data.frame(attr = c(1,30,4,6), type=c('foo_and_bar','foo_and_bar_2'))
out <- strsplit(as.character(before$type),'_and_')
do.call(rbind, out)
[,1] [,2]
[1,] "foo" "bar"
[2,] "foo" "bar_2"
[3,] "foo" "bar"
[4,] "foo" "bar_2"
And to combine:
data.frame(before$attr, do.call(rbind, out))
Notice that sapply with "[" can be used to extract either the first or second items in those lists so:
before$type_1 <- sapply(strsplit(as.character(before$type),'_and_'), "[", 1)
before$type_2 <- sapply(strsplit(as.character(before$type),'_and_'), "[", 2)
before$type <- NULL
And here's a gsub method:
before$type_1 <- gsub("_and_.+$", "", before$type)
before$type_2 <- gsub("^.+_and_", "", before$type)
before$type <- NULL