Tuexy
2011-Apr-07 07:46 UTC
[R] Two questions about metacharacter in regexprs and function return
for the script, please kindly see the script below. At line 10 and line 13,
my problems occurs.
The first one is I try to retrieve the gene official name from a column of a
table. The pattern of official name is something starting with gene_name.
For detail problems, please see the according lines.
Any suggestions are appreciated
example of matching source (extract the Nnat, sometime it would be the
character "N/A"):
"AB004048|MM8;NCBI Build 36|transcript|chr2|157251580|157253958|ExemplarFor
'AB004048'; gene_id '18111'; transcript_id 'AB004048';
gene_name 'Nnat'; alt
'5730414I02Rik|AW107673|Peg5';
neuronatin|http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=18111"
#obtain the exprs matrix for cluster analysis
#ask questions
DEG_files <- grep("bak", dir());#pay attention to the filenames
exprs_files <- grep("copy", dir());
protein <- c();
assign_exprs <- function(files, protein) { #use to find the DEGs or exprs
for cmeans clustering
for(i in 1:length(files)) {
microarray_data <- read.csv(file = files[i], header = T, sep
"\t");
microarray_data[, 7] <- gsub("([\\s\\S]+gene_name
'(\\w*)';.+)",
"\\2", microarray_data[, 7], perl = T);#why [\\w]* cannot workable?
also the
[(\\w*)(N/A)] cannot be workable.
assign(files[i], microarray_data, envir=.GlobalEnv); #get(dir()[i]()
can obtain the data of interest.`variable_names` can also work
protein <- c(protein, get(files[i])[, 7]); #used for obtain all
the DEGs only
}
#return protein; #why this line is not workable?
assign("all_protein", protein, envir=.GlobalEnv);
}
exist_to_cluster_exprs <- function(x, cluster_exprs, all) {
if(exists("all", x[1])){ #exists function
cluster_exprs <- cbind(cluster_exprs, x);
}
#return cluster_exprs;
}
assign_exprs(dir()[DEG_files], protein);
all_protein <- unique(all_protein);
assign_exprs(dir()[exprs_files], protein);
for(i in 1:2) {
apply(get(dir()[exprs_files[i]]), 1, exist_to_cluster_exprs,
cluster_exprs, all);
#assign(paste(exprs_files()[i], "exprs_data"), cluster_exprs[,
c(2,
3, 5, 7)];
exprs_data <- cbind(exprs_data, cluster_exprs[, 3]);
}
exprs_data;
--
View this message in context:
http://r.789695.n4.nabble.com/Two-questions-about-metacharacter-in-regexprs-and-function-return-tp3432692p3432692.html
Sent from the R help mailing list archive at Nabble.com.
Tuexy
2011-Apr-07 13:04 UTC
[R] Two questions about metacharacter in regexprs and function return
for the script, please kindly see the script below. At line 10 and line 13,
my problems occurs.
The first one is I try to retrieve the gene official name from a column of a
table. The pattern of official name is something starting with gene_name.
For detail problems, please see the according lines.
Any suggestions are appreciated
example of matching source (extract the Nnat, sometime it would be the
character "N/A"):
"AB004048|MM8;NCBI Build 36|transcript|chr2|157251580|157253958|ExemplarFor
'AB004048'; gene_id '18111'; transcript_id 'AB004048';
gene_name 'Nnat'; alt
'5730414I02Rik|AW107673|Peg5';
neuronatin|http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=18111"
#obtain the exprs matrix for cluster analysis
#ask questions
DEG_files <- grep("bak", dir());#pay attention to the filenames
exprs_files <- grep("copy", dir());
protein <- c();
assign_exprs <- function(files, protein) { #use to find the DEGs or exprs
for cmeans clustering
for(i in 1:length(files)) {
microarray_data <- read.csv(file = files[i], header = T, sep
"\t");
microarray_data[, 7] <- gsub("([\\s\\S]+gene_name
'(\\w*)';.+)",
"\\2", microarray_data[, 7], perl = T);#why [\\w]* cannot workable?
also the
[(\\w*)(N/A)] cannot be workable.
assign(files[i], microarray_data, envir=.GlobalEnv); #get(dir()[i]()
can obtain the data of interest.`variable_names` can also work
protein <- c(protein, get(files[i])[, 7]); #used for obtain all
the DEGs only
}
#return protein; #why this line is not workable?
assign("all_protein", protein, envir=.GlobalEnv);
}
exist_to_cluster_exprs <- function(x, cluster_exprs, all) {
if(exists("all", x[1])){ #exists function
cluster_exprs <- cbind(cluster_exprs, x);
}
#return cluster_exprs;
}
assign_exprs(dir()[DEG_files], protein);
all_protein <- unique(all_protein);
assign_exprs(dir()[exprs_files], protein);
for(i in 1:2) {
apply(get(dir()[exprs_files[i]]), 1, exist_to_cluster_exprs,
cluster_exprs, all);
#assign(paste(exprs_files()[i], "exprs_data"), cluster_exprs[,
c(2,
3, 5, 7)];
exprs_data <- cbind(exprs_data, cluster_exprs[, 3]);
}
exprs_data;
--
View this message in context:
http://r.789695.n4.nabble.com/Two-questions-about-metacharacter-in-regexprs-and-function-return-tp3433342p3433342.html
Sent from the R help mailing list archive at Nabble.com.