I'm trying to analyze the following data set (sample):
"ID" "adj.P.Val" "logFC"
"Gene.symbol"
"1419156_at" "5.32e-12" "2.6462565"
"Sox4"
"1433575_at" "5.32e-12" "3.9417089"
"Sox4"
"1428942_at" "2.64e-11" "3.9163618"
"Mt2"
"1454699_at" "2.69e-10" "1.8654677"
"LOC100047324///Sesn1"
"1416926_at" "3.19e-10" "2.172342"
"Trp53inp1"
"1422557_s_at" "1.58e-09" "2.9569254"
"Mt1"
etc.
using the following code:
muscle = read.table(file="/Users/bob/Desktop/Muscle/musclesmall.txt",
header
= TRUE, colClasses = "character", fill = TRUE)
upregulated_list = c()
downregulated_list = c()
nochange = c()
p_thresh = 6.51e-06
x=1
while (x <= nrow(muscle)) {
this_pval = muscle[x,"adj.P.Val"]
this_M = muscle[x, "logFC"]
if (muscle[x, "Gene.symbol"] == "") {
x= x +1
}
else {if ((this_M >= 1.0) & (this_pval <= p_thresh)) {
upregulated_list <- append(upregulated_list,
muscle[x,"Gene.symbol"],after=length(upregulated_list))
x = x +1}
else {if ((this_M <= -1) & (this_pval <= p_thresh)) {
downregulated_list <- append(downregulated_list,
muscle[x,"Gene.symbol"],after=length(downregulated_list))
x = x+1
}
else {if ((this_M > -1) & (this_M < 1)) {
nochange <- append(nochange,
muscle[x,"Gene.symbol"],after=length(nochange))
x = x+1}
}
}
}
}
This process, however, goes line-by-line and the data has 22,000 rows, so
running the process takes an enormous amount of time. Is there any way for
me to do the analysis faster?
--
View this message in context:
http://r.789695.n4.nabble.com/Analyzing-large-files-faster-tp4633165.html
Sent from the R help mailing list archive at Nabble.com.