HI, directory<- "/home/arunksa111/NewData" GetFileList <- function(directory,number){ ? setwd(directory) ?filelist1<-dir()[file.info(dir())$isdir] ??? direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE) ?direct<-lapply(direct,function(x) paste(directory,"/",x,sep="")) ??? lista<-unlist(direct) ?output<- list(filelist1,lista) ?return(output) ??? } ?file.list.names<-GetFileList(directory,23) [[1]] ?lista<-GetFileList(directory,23) [[2]] FacGroup<-c(0,1,1,1,0,2,2,2) ReadDir<-function(FacGroup){ ?list.new<-lista[FacGroup!=0] ?read.list<-lapply(list.new, function(x) read.table(x,header=TRUE, sep = "\t",stringsAsFactors=FALSE)) ?names(read.list)<-file.list.names[FacGroup!=0] ?return (read.list) } ListFacGroup<-ReadDir(FacGroup) ListFacGroupSub<-lapply(ListFacGroup,head) Pro<- function(lista,FDR_k) { split.list<- split(lista,names(lista)) seq.mod.z<- lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]< FDR_k,c("Seq","Mod","z","Pro")])) names(seq.mod.z)<- names(split.list) folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x))))) ? library(plyr) ? library(data.table) merge.data<-lapply(folder.name,function(x) lapply(x,function(x1) {x1<- data.table(x1);x1[,Pro:=paste(Pro,collapse=","),by=c("Seq","Mod","z")]})) count.Pro<-lapply(merge.data,function(x) lapply(x,function(x1) { x1$counts<-sapply(x1$Pro,function(x2) length(gsub("\\s","",unlist(strsplit(x2,",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]})) count.ProUnique<-lapply(count.Pro,function(x) lapply(x,unique)) ? #count Pro by group (2-columns) ? Pro.group<-lapply(count.ProUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x)) ?? #Pro.group1<-Pro.group[lapply(Pro.group,length)!=0] ?? res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),Pro.group) ? res[is.na(res)] <- 0 ?res<- as.data.frame(res,stringsAsFactors=FALSE) ?res } ?Pro(ListFacGroupSub,0.05) #??????????????????????? Seq???????????????? Mod z c2 c3 c4 t2 t3 t4 #1???? aAAAAAAAAAAAAAATATAGPR????????? 1-n_acPro/ 2? 0? 0? 1? 1? 0? 1 #2????? aAAAAAAAAAAASSPVGVGQR????????? 1-n_acPro/ 2? 0? 0? 1? 1? 0? 1 #3?????????? aAAAAAAAAAGAAGGR????????? 1-n_acPro/ 2? 2? 0? 2? 2? 2? 2 #4????? aAAAAAAAGAAGGRGSGPGRR????????? 1-n_acPro/ 2? 0? 0? 2? 0? 0? 2 #5??????????????? AAAAAAAkAAK???????????? 8-K_ac/ 2? 1? 0? 0? 0? 0? 0 #6??????????????? AAAAAAALQAK???????????????????? 2? 0? 2? 0? 2? 0? 0 #7???????????? aAAAAAGAGPEMVR????????? 1-n_acPro/ 2? 2? 2? 2? 2? 2? 2 #8??????????? aAAAAATAAAAASIR????????? 1-n_acPro/ 2? 0? 0? 0? 0? 1? 0 #9? aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2? 0? 0? 0 18? 0? 0 #10 aAAAAEQQQFYLLLGNLLSPDNVVR????????? 1-n_acPro/ 2 18? 0? 0? 0? 0? 0 #11 aAAAAEQQQFYLLLGNLLSPDNVVR????????? 1-n_acPro/ 3? 0 18? 0? 0? 0? 0 #12?????????? aAAAAVGNAVPCGAR????????? 1-n_acPro/ 2? 0? 1? 0? 0? 0? 0 ProCt<-Pro(ListFacGroup,0.05) ?dim(ProCt) #[1] 29429???? 9 A.K. ________________________________ From: Vera Costa <veracosta.rt at gmail.com> To: arun <smartpink111 at yahoo.com> Sent: Thursday, May 16, 2013 1:37 PM Subject: Re: question Hi. Other thing that I need (and I sent a new format data) is to count data like function spec, but for the variable "pro". The idea is exactly the same, but I'm with some dificulties because the format of the data... The Pro is like ">sp|Q86U42|PABP2_HUMAN,>sp|Q86U42-2|PABP2_HUMAN". The comma split 2 pro's.