Reshape a dataframe to long format with multiple sets of measure columns [duplicate]

Using the new feature in melt from data.table v1.9.5+:

require(data.table) # v1.9.5+
melt(setDT(df), measure = patterns("^year", "^pop"), value.name = c("year", "pop"))

You can find the rest of the vignettes here.

If the 'year', 'pop', columns are alternating, we can subset with c(TRUE, FALSE) to get the columns 1, 3, 5,..etc. and c(FALSE, TRUE) to get 2, 4, 6,.. due to the recycling. Then, we unlist the columns and create a new 'data.frame.

 df2 <- data.frame(year=unlist(df1[c(TRUE, FALSE)]), 
                  pop=unlist(df1[c(FALSE, TRUE)]))
 row.names(df2) <- NULL
 head(df2)
 #   year    pop
 #1            
 #2 16XX 4675,0
 #3 17XX 4739,3
 #4 17XX 4834,0
 #5 180X 4930,0
 #6 180X 5029,0

Or another option is

library(splitstackshape)
merged.stack(transform(df1, id=1:nrow(df1)), var.stubs=c('year', 'pop'), 
        sep='var.stubs')[order(.time_1), 3:4, with=FALSE]

data

df1 <- structure(list(year1 = c("", "16XX", "17XX", "17XX", "180X", 
"180X", "181X", "181X", "182X", "182X"), pop1 = c("", "4675,0", 
"4739,3", "4834,0", "4930,0", "5029,0", "5129,0", "5231,9", "5297,0", 
"5362,0"), year2 = c(NA, 1900L, 1901L, 1902L, 1903L, 1904L, 1905L, 
1906L, 1907L, 1908L), pop2 = c("", "6453,0", "6553,5", "6684,0", 
"6818,0", "6955,0", "7094,0", "7234,7", "7329,0", "7422,0"), 
year3 = c(NA, 1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 
1937L, 1938L), pop3 = c("", "9981,2", "", "", "", "", "", 
"", "", "")), .Names = c("year1", "pop1", "year2", "pop2", 
"year3", "pop3"), class = "data.frame", row.names = c(NA, -10L))

Another option is to use split.default to split the dataframe in a list of dataframes and then bind them together:

lst <- lapply(split.default(df1, sub('.*(\\d)', '\\1', names(df1))),
              setNames, c('year','pop'))

do.call(rbind, lst)

which gives the desired result:

    year     pop
1.1 16XX  4675,0
1.2 17XX  4739,3
1.3 17XX  4834,0
1.4 180X  4930,0
1.5 180X  5029,0
1.6 181X  5129,0
1.7 181X  5231,9
1.8 182X  5297,0
1.9 182X  5362,0
2.1 1900  6453,0
2.2 1901  6553,5
2.3 1902  6684,0
2.4 1903  6818,0
2.5 1904  6955,0
2.6 1905  7094,0
2.7 1906  7234,7
2.8 1907  7329,0
2.9 1908  7422,0
3.1 1930  9981,2
3.2 1931 10583,5
3.3 1932  8671,0
3.4 1933  9118,0
3.5 1934  9625,0
3.6 1935  8097,0
3.7 1936  7984,7
3.8 1937  8729,0
3.9 1938 10462,0

You could also use rbindlist from the data.table package for the last step:

library(data.table)
rbindlist(lst)

Used data:

df1 <- structure(list(year1 = c("16XX", "17XX", "17XX", "180X", "180X", "181X", "181X", "182X", "182X"),
                      pop1 = c("4675,0", "4739,3", "4834,0", "4930,0", "5029,0", "5129,0", "5231,9", "5297,0", "5362,0"),
                      year2 = c(1900L, 1901L, 1902L, 1903L, 1904L, 1905L, 1906L, 1907L, 1908L),
                      pop2 = c("6453,0", "6553,5", "6684,0", "6818,0", "6955,0", "7094,0", "7234,7", "7329,0", "7422,0"), 
                      year3 = c(1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 1937L, 1938L),
                      pop3 = c("9981,2", "10583,5", "8671,0", "9118,0", "9625,0", "8097,0", "7984,7", "8729,0", "10462,0")),
                 .Names = c("year1", "pop1", "year2", "pop2", "year3", "pop3"), class = "data.frame", row.names = c(NA, -9L))

Reshape a dataframe to long format with multiple sets of measure columns [duplicate]

data

Related

Recent Posts