r - Reshape a large matrix with missing values and multiple vars of interest -
i need reorganize large dataset specific format further analysis. right data in long format, multiple records through time each point. need reshape data each point has single record, add many new columns of time-specific data. i’ve looked @ previous similar posts need convert several of current variables columns, , can’t find example of such. there way accomplish in single reshape, or have several , concatenate new columns together? wrinkle before post example not points sampled @ each time-step, need values show na. example, (see data below) sitepoint a1 not sampled @ in 2012, sitepoint a10 not sampled during first round in 2012, k83 sampled 9 times.
mydatain <- structure(list(sitepoint = structure(c(1l, 1l, 1l, 1l, 1l, 1l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 6l, 6l), .label = c("a1", "a10", "k145", "k83", "t15", "t213"), class = "factor"), year_rotation = structure(c(1l, 2l, 3l, 4l, 5l, 6l, 1l, 2l, 3l, 4l, 5l, 6l, 8l, 9l, 1l, 2l, 4l, 5l, 6l, 7l, 8l, 9l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 1l, 7l), .label = c("2010_1", "2010_2", "2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3" ), class = "factor"), mr_fire = structure(c(5l, 6l, 6l, 2l, 9l, 9l, 5l, 6l, 6l, 2l, 9l, 9l, 7l, 8l, 16l, 17l, 21l, 22l, 23l, 25l, 3l, 4l, 10l, 11l, 12l, 13l, 14l, 15l, 18l, 19l, 20l, 1l, 2l, 2l, 5l, 6l, 6l, 11l, 11l, 12l, 7l, 24l), .label = c("0", "1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25", "35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53", "8", "9"), class = "factor"), fire_seas = structure(c(2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 1l, 1l, 1l, 1l, 1l, 3l, 3l, 3l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 1l, 3l), .label = c("dry", "fire", "wet" ), class = "factor"), opttsf = c(1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 0l, 0l, 0l, 0l, 0l, 1l, 1l, 1l, 1l, 1l, 1l, 0l, 0l, 0l, 0l, 0l, 0l, 1l, 1l, 1l, 1l, 1l, 1l, 0l, 0l, 0l, 1l, 1l)), .names = c("sitepoint", "year_rotation", "mr_fire", "fire_seas", "opttsf"), row.names = c(31l, 32l, 33l, 34l, 35l, 36l, 67l, 68l, 69l, 70l, 71l, 72l, 73l, 74l, 10543l, 10544l, 10545l, 10546l, 10547l, 10548l, 10549l, 10550l, 14988l, 14989l, 14990l, 14991l, 14992l, 14993l, 14994l, 14995l, 14996l, 17370l, 17371l, 17372l, 17373l, 17374l, 17375l, 17376l, 17377l, 17378l, 19353l, 19354l), class = "data.frame") ultimately need this:
myfinal <- structure(list(sitepoint = structure(1:6, .label = c("a1", "a10", "k145", "k83", "t15", "t213"), class = "factor"), mr_fire_2010_1 = c(12l, 12l, 39l, 23l, 0l, 14l), mr_fire_2010_2 = c(13l, 13l, 40l, 24l, 1l, na), mr_fire_2010_3 = c(13l, 13l, na, 25l, 1l, na), mr_fire_2011_1 = c(1l, 1l, 51l, 35l, 12l, na), mr_fire_2011_2 = c(2l, 2l, 52l, 36l, 13l, na), mr_fire_2011_3 = c(2l, 2l, 53l, 37l, 13l, na), mr_fire_2012_1 = c(na, na, 9l, 47l, 24l, 8l), mr_fire_2012_2 = c(na, 14l, 10l, 48l, 24l, na), mr_fire_2012_3 = c(na, 15l, 11l, 49l, 25l, na), season_2010_1 = structure(c(2l, 2l, 1l, 2l, 2l, 1l), .label = c("dry", "fire"), class = "factor"), season_2010_2 = structure(c(2l, 2l, 1l, 2l, 2l, na), .label = c("dry", "fire"), class = "factor"), season_2010_3 = structure(c(1l, 1l, na, 1l, 1l, na), .label = "fire", class = "factor"), season_2011_1 = structure(c(2l, 2l, 1l, 2l, 2l, na), .label = c("dry", "fire"), class = "factor"), season_2011_2 = structure(c(2l, 2l, 1l, 2l, 2l, na), .label = c("dry", "fire"), class = "factor"), season_2011_3 = structure(c(2l, 2l, 1l, 2l, 2l, na), .label = c("dry", "fire"), class = "factor"), season_2012_1 = structure(c(na, na, 2l, 1l, 1l, 2l), .label = c("fire", "wet"), class = "factor"), season_2012_2 = structure(c(na, 1l, 2l, 1l, 1l, na), .label = c("fire", "wet"), class = "factor"), season_2012_3 = structure(c(na, 1l, 2l, 1l, 1l, na), .label = c("fire", "wet"), class = "factor"), opttsf_2010_1 = c(1l, 1l, 0l, 1l, 1l, 1l), opttsf_2010_2 = c(1l, 1l, 0l, 1l, 1l, na), opttsf_2010_3 = c(1l, 1l, na, 1l, 1l, na), opttsf_2011_1 = c(1l, 1l, 0l, 0l, 1l, na), opttsf_2011_2 = c(1l, 1l, 0l, 0l, 1l, na), opttsf_2011_3 = c(1l, 1l, 0l, 0l, 1l, na), opttsf_2012_1 = c(na, na, 1l, 0l, 0l, 1l), opttsf_2012_2 = c(na, 1l, 1l, 0l, 0l, na), opttsf_2012_3 = c(na, 1l, 1l, 0l, 0l, na)), .names = c("sitepoint", "mr_fire_2010_1", "mr_fire_2010_2", "mr_fire_2010_3", "mr_fire_2011_1", "mr_fire_2011_2", "mr_fire_2011_3", "mr_fire_2012_1", "mr_fire_2012_2", "mr_fire_2012_3", "season_2010_1", "season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2", "season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3", "opttsf_2010_1", "opttsf_2010_2", "opttsf_2010_3", "opttsf_2011_1", "opttsf_2011_2", "opttsf_2011_3", "opttsf_2012_1", "opttsf_2012_2", "opttsf_2012_3"), class = "data.frame", row.names = c(na, -6l )) the actual dataset 23656 records x 15 variables, doing hand cause major headaches , potential mistakes. or suggestions appreciated. if has been answered elsewhere, apologies. couldn’t find directly applicable; seemed related 3 columns , 1 of being extracted new variables. thanks.
sp
dcast devel version of data.table i.e., v1.9.5 can cast multiple columns simultaneously. can installed here.
library(data.table) ## v1.9.5+ dcast(setdt(mydatain), sitepoint~year_rotation, value.var=c('mr_fire', 'fire_seas', 'opttsf'))
Comments
Post a Comment