Tuexy
2011-Apr-07 07:46 UTC
[R] Two questions about metacharacter in regexprs and function return
for the script, please kindly see the script below. At line 10 and line 13, my problems occurs. The first one is I try to retrieve the gene official name from a column of a table. The pattern of official name is something starting with gene_name. For detail problems, please see the according lines. Any suggestions are appreciated example of matching source (extract the Nnat, sometime it would be the character "N/A"): "AB004048|MM8;NCBI Build 36|transcript|chr2|157251580|157253958|ExemplarFor 'AB004048'; gene_id '18111'; transcript_id 'AB004048'; gene_name 'Nnat'; alt '5730414I02Rik|AW107673|Peg5'; neuronatin|http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=18111" #obtain the exprs matrix for cluster analysis #ask questions DEG_files <- grep("bak", dir());#pay attention to the filenames exprs_files <- grep("copy", dir()); protein <- c(); assign_exprs <- function(files, protein) { #use to find the DEGs or exprs for cmeans clustering for(i in 1:length(files)) { microarray_data <- read.csv(file = files[i], header = T, sep "\t"); microarray_data[, 7] <- gsub("([\\s\\S]+gene_name '(\\w*)';.+)", "\\2", microarray_data[, 7], perl = T);#why [\\w]* cannot workable? also the [(\\w*)(N/A)] cannot be workable. assign(files[i], microarray_data, envir=.GlobalEnv); #get(dir()[i]() can obtain the data of interest.`variable_names` can also work protein <- c(protein, get(files[i])[, 7]); #used for obtain all the DEGs only } #return protein; #why this line is not workable? assign("all_protein", protein, envir=.GlobalEnv); } exist_to_cluster_exprs <- function(x, cluster_exprs, all) { if(exists("all", x[1])){ #exists function cluster_exprs <- cbind(cluster_exprs, x); } #return cluster_exprs; } assign_exprs(dir()[DEG_files], protein); all_protein <- unique(all_protein); assign_exprs(dir()[exprs_files], protein); for(i in 1:2) { apply(get(dir()[exprs_files[i]]), 1, exist_to_cluster_exprs, cluster_exprs, all); #assign(paste(exprs_files()[i], "exprs_data"), cluster_exprs[, c(2, 3, 5, 7)]; exprs_data <- cbind(exprs_data, cluster_exprs[, 3]); } exprs_data; -- View this message in context: http://r.789695.n4.nabble.com/Two-questions-about-metacharacter-in-regexprs-and-function-return-tp3432692p3432692.html Sent from the R help mailing list archive at Nabble.com.
Tuexy
2011-Apr-07 13:04 UTC
[R] Two questions about metacharacter in regexprs and function return
for the script, please kindly see the script below. At line 10 and line 13, my problems occurs. The first one is I try to retrieve the gene official name from a column of a table. The pattern of official name is something starting with gene_name. For detail problems, please see the according lines. Any suggestions are appreciated example of matching source (extract the Nnat, sometime it would be the character "N/A"): "AB004048|MM8;NCBI Build 36|transcript|chr2|157251580|157253958|ExemplarFor 'AB004048'; gene_id '18111'; transcript_id 'AB004048'; gene_name 'Nnat'; alt '5730414I02Rik|AW107673|Peg5'; neuronatin|http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=18111" #obtain the exprs matrix for cluster analysis #ask questions DEG_files <- grep("bak", dir());#pay attention to the filenames exprs_files <- grep("copy", dir()); protein <- c(); assign_exprs <- function(files, protein) { #use to find the DEGs or exprs for cmeans clustering for(i in 1:length(files)) { microarray_data <- read.csv(file = files[i], header = T, sep "\t"); microarray_data[, 7] <- gsub("([\\s\\S]+gene_name '(\\w*)';.+)", "\\2", microarray_data[, 7], perl = T);#why [\\w]* cannot workable? also the [(\\w*)(N/A)] cannot be workable. assign(files[i], microarray_data, envir=.GlobalEnv); #get(dir()[i]() can obtain the data of interest.`variable_names` can also work protein <- c(protein, get(files[i])[, 7]); #used for obtain all the DEGs only } #return protein; #why this line is not workable? assign("all_protein", protein, envir=.GlobalEnv); } exist_to_cluster_exprs <- function(x, cluster_exprs, all) { if(exists("all", x[1])){ #exists function cluster_exprs <- cbind(cluster_exprs, x); } #return cluster_exprs; } assign_exprs(dir()[DEG_files], protein); all_protein <- unique(all_protein); assign_exprs(dir()[exprs_files], protein); for(i in 1:2) { apply(get(dir()[exprs_files[i]]), 1, exist_to_cluster_exprs, cluster_exprs, all); #assign(paste(exprs_files()[i], "exprs_data"), cluster_exprs[, c(2, 3, 5, 7)]; exprs_data <- cbind(exprs_data, cluster_exprs[, 3]); } exprs_data; -- View this message in context: http://r.789695.n4.nabble.com/Two-questions-about-metacharacter-in-regexprs-and-function-return-tp3433342p3433342.html Sent from the R help mailing list archive at Nabble.com.