thr3ads.net - R help - [R] : Quantile and rowMean from multiple files in a folder [Apr 2014]

If this information is useful, please help other people find it:
Share via:
arun
2014-Apr-15 03:04 UTC
[R] : Quantile and rowMean from multiple files in a folder

Hi,
It is because of different dimensions of Simulation data? within each Site.
Try:
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*",
"", list.files(pattern = ".csv")))
sapply(lst1,length)
#G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112 G113 G114 G115
# 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100
#G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14 GG15 GG16 GG17 GG18 GG19 GG20
# 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100? 100
#GG21 GG22 GG23 GG24 GG25 GG26 GG27 GG28 
# 100? 100? 100? 100? 100? 100? 100? 100 

lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
? ? lines1 <- readLines(x2)
? ? header1 <- lines1[1:2]
? ? dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
? ? ? ? skip = 2)
? ? colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
? ? dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))

##dimensions differ within each Site
sapply(lst2,function(x) sapply(x,ncol))[1:6,5:8]
#? ?  G104 G105 G106 G107
#[1,]? 258? 257? 258? 258
#[2,]? 258? 258? 258? 258
#[3,]? 258? 258? 258? 258
#[4,]? 258? 257? 258? 258
#[5,]? 258? 258? 258? 258
#[6,]? 258? 258? 258? 258

##number of rows are consistent
sapply(lst2,function(x) any(sapply(x,nrow)!=9))
# G100? G101? G102? G103? G104? G105? G106? G107? G108? G109? G110? G111? G112 
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
# G113? G114? G115? G116? G117? G118? G119? G120? GG10? GG11? GG12? GG13? GG14 
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
# GG15? GG16? GG17? GG18? GG19? GG20? GG21? GG22? GG23? GG24? GG25? GG26? GG27 
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
# GG28 
#FALSE 
names1 <- unique(unlist(lapply(lst2,function(x) unlist(lapply(x,function(y)
names(y)[-1])))))
length(names1)
#[1] 257


# lstYear <- lapply(lst2,function(x) lapply(x, function(y)
# y[,1,drop=FALSE])[[1]])

library(plyr)

lapply(seq_along(lst2),function(i) {lstN <- lapply(lst2[[i]],function(x)
{datN <- as.data.frame(matrix(NA, nrow=9,
ncol=length(names1),dimnames=list(NULL,names1)));datN[,names1] <- x[,-1];
datN }); lstQ1 <- lapply(lstN,function(x) numcolwise(function(y)
quantile(y,seq(0,1,by=0.01), na.rm=TRUE))(x)); arr1 <- array(unlist(lstQ1),
dim=c(dim(lstQ1[[1]]),length(lstQ1)),dimnames=list(NULL,lapply(lstQ1,names)[[1]]));
res <- rowMeans(arr1, dims=2, na.rm=TRUE); colnames(res) <- gsub("
", "_", colnames(res)); res1 <-
data.frame(Percentiles=paste0(seq(0,100, by=1),"%"),res,
stringsAsFactors=FALSE); write.csv(res1,paste0(paste(getwd(), "final",
paste(names(lst1)[[i]], "Quantile", sep="_"), sep=
"/"), ".csv"), row.names=FALSE, quote=FALSE)})



## output files
list.files(recursive = TRUE)[grep("Quantile", list.files(recursive =
TRUE))]
#[1] "final/G100_Quantile.csv" "final/G101_Quantile.csv"
#[3] "final/G102_Quantile.csv" "final/G103_Quantile.csv"
#[5] "final/G104_Quantile.csv" "final/G105_Quantile.csv"
#[7] "final/G106_Quantile.csv" "final/G107_Quantile.csv"
#[9] "final/G108_Quantile.csv" "final/G109_Quantile.csv"
#[11] "final/G110_Quantile.csv" "final/G111_Quantile.csv"
#[13] "final/G112_Quantile.csv" "final/G113_Quantile.csv"
#[15] "final/G114_Quantile.csv" "final/G115_Quantile.csv"
#[17] "final/G116_Quantile.csv" "final/G117_Quantile.csv"
#[19] "final/G118_Quantile.csv" "final/G119_Quantile.csv"
#[21] "final/G120_Quantile.csv" "final/GG10_Quantile.csv"
#[23] "final/GG11_Quantile.csv" "final/GG12_Quantile.csv"
#[25] "final/GG13_Quantile.csv" "final/GG14_Quantile.csv"
#[27] "final/GG15_Quantile.csv" "final/GG16_Quantile.csv"
#[29] "final/GG17_Quantile.csv" "final/GG18_Quantile.csv"
#[31] "final/GG19_Quantile.csv" "final/GG20_Quantile.csv"
#[33] "final/GG21_Quantile.csv" "final/GG22_Quantile.csv"
#[35] "final/GG23_Quantile.csv" "final/GG24_Quantile.csv"
#[37] "final/GG25_Quantile.csv" "final/GG26_Quantile.csv"
#[39] "final/GG27_Quantile.csv" "final/GG28_Quantile.csv"


ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile",
list.files(recursive = TRUE))],
? ? function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
sapply(ReadOut1,function(x) dim(x))
#? ?  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#[1,]? 101? 101? 101? 101? 101? 101? 101? 101? 101?  101?  101?  101?  101?  101
#[2,]? 258? 258? 258? 258? 258? 258? 258? 258? 258?  258?  258?  258?  258?  258
#? ?  [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#[1,]?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101
#[2,]?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258
#? ?  [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#[1,]?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101?  101
#[2,]?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258?  258
#? ?  [,39] [,40]
#[1,]?  101?  101
#[2,]?  258?  258

ReadOut1[[1]][1:3,1:3]
#? Percentiles? txav_DJF txav_MAM
#1? ? ? ? ? 0% -12.56619 6.795429
#2? ? ? ? ? 1% -12.45888 6.864886
#3? ? ? ? ? 2% -12.35157 6.934344

### Q2:
dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1)
nrow(lstNew)
#[1] 258

lapply(2:nrow(lstNew), function(i) {
? ? dat1 <- data.frame(lstNew[1], do.call(cbind, lstNew[i, ]),
stringsAsFactors = FALSE)
? ? colnames(dat1) <- c(rownames(lstNew)[1], paste(names(lst1),
rep(rownames(lstNew)[i],
? ? ? ? length(lst1)), sep = "_"))
? ? write.csv(dat1, paste0(paste(getwd(), "Indices",
rownames(lstNew)[i], sep = "/"),
? ? ? ? ".csv"), row.names = FALSE, quote = FALSE)
})

## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
? ? function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2)
#[1] 257

head(ReadOut2[[1]], 2)
#Percentiles G100_pav_ANN G101_pav_ANN G102_pav_ANN G103_pav_ANN G104_pav_ANN
#1? ? ? ? ? 0%? ?  0.978451? ? 0.9517680? ? 0.9383280? ? 0.8519280? ? 0.9438790
#2? ? ? ? ? 1%? ?  0.992648? ? 0.9638816? ? 0.9480754? ? 0.8625262? ? 0.9548512
#? G105_pav_ANN G106_pav_ANN G107_pav_ANN G108_pav_ANN G109_pav_ANN G110_pav_ANN
#1? ? 0.9303260? ? 0.7484670? ? 0.9757010? ?  1.049533? ? 0.9841290? ? 0.7778830
#2? ? 0.9417438? ? 0.7594563? ? 0.9868968? ?  1.063668? ? 0.9968095? ? 0.7882509
#? G111_pav_ANN G112_pav_ANN G113_pav_ANN G114_pav_ANN G115_pav_ANN G116_pav_ANN
#1? ?  0.737651? ? 0.8813010? ? 0.9155330? ?  0.829001? ? 0.6778760? ? 0.5463310
#2? ?  0.746934? ? 0.8924871? ? 0.9265448? ?  0.838534? ? 0.6880397? ? 0.5527359
#? G117_pav_ANN G118_pav_ANN G119_pav_ANN G120_pav_ANN GG10_pav_ANN GG11_pav_ANN
#1? ? 0.7191360? ? 0.7470170? ? 0.7859380? ? 0.7774590? ? 0.6303150? ? 0.5200200
#2? ? 0.7278231? ? 0.7556053? ? 0.7975213? ? 0.7852408? ? 0.6381671? ? 0.5258248
#? GG12_pav_ANN GG13_pav_ANN GG14_pav_ANN GG15_pav_ANN GG16_pav_ANN GG17_pav_ANN
#1? ? 0.6672890? ?  0.851834? ? 0.5209710? ? 0.6445290? ? 0.5874320? ? 0.7263650
#2? ? 0.6761913? ?  0.861177? ? 0.5282514? ? 0.6520456? ? 0.5948674? ? 0.7365299
#? GG18_pav_ANN GG19_pav_ANN GG20_pav_ANN GG21_pav_ANN GG22_pav_ANN GG23_pav_ANN
#1? ? 0.6642220? ? 0.5385440? ? 0.5043320? ? 0.7484140? ? 0.6436940? ?  0.541165
#2? ? 0.6729234? ? 0.5454527? ? 0.5120815? ? 0.7575216? ? 0.6502167? ?  0.549040
#? GG24_pav_ANN GG25_pav_ANN GG26_pav_ANN GG27_pav_ANN GG28_pav_ANN
#1? ? 0.5067010? ? 0.7082260? ? 0.6447260? ? 0.6197480? ? 0.9163480
#2? ? 0.5136588? ? 0.7160864? ? 0.6545266? ? 0.6278891? ? 0.9284303 


Also, atttached is the script in case the email mangles the code.

A.K.



On Monday, April 14, 2014 6:26 PM, Zilefac Elvis <zilefacelvis at
yahoo.com> wrote:

Hi AK,
I have another request for help.
Attached is a larger file (~27MB) for sample.zip. All files are same as previous
except that I am using more sites to do the same thing that you did with
sample.zip.

When generalizing Quantilecode.R to many sites, I receive an error when I run:

dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1)

lapply(2:nrow(lstNew), function(i) {
? dat1 <- data.frame(lstNew[1], do.call(cbind, lstNew[i, ]), stringsAsFactors
= FALSE)
? colnames(dat1) <- c(rownames(lstNew)[1], paste(names(lst1),
rep(rownames(lstNew)[i],?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? length(lst1)),
sep = "_"))
? write.csv(dat1, paste0(paste(getwd(), "Indices",
rownames(lstNew)[i], sep = "/"),?
? ? ? ? ? ? ? ? ? ? ? ? ?".csv"), row.names = FALSE, quote = FALSE)
})

and I get this:
Error in 2:nrow(lstNew) : argument of length 0


I have tried a few tricks but could not overcome the error message.

Please help!
Atem.
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: Quantilecode.txt
URL:
<https://stat.ethz.ch/pipermail/r-help/attachments/20140414/2628a0b7/attachment-0002.txt>
R help - Apr 2014 - : Quantile and rowMean from multiple files in a folder

[R] : Quantile and rowMean from multiple files in a folder