Dear R users, A colleague of mine asked me how to write a script (an executable text file containing R code) in R. After I showed him, he said that after extensive searching of the R archives, he had not found anything like these techniques. He suggested that I share these methods to enable others to leverage R as a better alternative to bash/perl scripts. So in the interest of giving back to the R community, and with all humility, I offer the following small demonstration of one method for creating scripts of R code that are executable from the (at least Linux) command line. I don't make any warrantees that this will work for you, but if it helps somebody at least get starting utilizing R effectively in scripts, then great! Best regards, Jason -- Jason E. Aten, Ph.D. # file: scriptdemo.rsh #!/bin/bash exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ #debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ ### The above line starts R and then reads in this script, starting at line 4: # # scriptdemo.rsh : a simple filter script to demonstrate how to write a script in R that # reads stdin and utilizes command line argv. Also shows how to use ppp() to do # bash scripting like variable substitution, which is really just syntactic # sugar. But sugar can be sweet. # # NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. If it breaks, you # get to keep both pieces. # 1st point of note: notice the exec R invocation above, with the pipe and tail combo. # This file becomes the program read into R. If it is set chmod+x then you can execute this file. pp=function(...) paste(sep="",...) script="scriptdemo.rsh" usage=pp(script,": put help info here") argv = commandArgs(trailingOnly=TRUE) # --help if(any(argv=="--help")) { cat(usage) quit(save="no",status=0) } # 2nd point of note: this is how to read stdin inside a script: # # slurp in all the input r=readLines("stdin") bad=grep("^#",r) # remove comments # write out lines that didn't start with # cat(r[setdiff(1:length(r),bad)],sep="\n") # 3rd point of note: if you want nice bash shell scripting string substitution and backticking # you can use my ppp() function. Note it's not well vectorized at the moment, so it will expect # variables that are substituted from the environment to be of length 1. # A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me wince), but it gets the job done, # as it's meant as a proof of concept. ########################## # utility functions leading up to final definition of ppp() : shell scripting like facilities for R # Skip to the end of this file to see what ppp() does for you. ########################## # delete one trailing whitespace chomp=function(x) { n=nchar(x) a=substr(x,n,n) w=which(a==" " | a == "\n" | a=="\t") if (length(w)) { x[w]=substr(x[w],1,n[w]-1) } x } # delete one leading whitespace prechomp=function(x) { n=nchar(x) a=substr(x,1,1) w=which(a==" " | a == "\n" | a == "\t") if (length(w)) { x[w]=substr(x[w],2,n[w]) } x } # eliminate whitespace leading/trailing from a string trim=function(x) { y=chomp(x) while(any(y!=x)) { x=y y=chomp(x) } y=prechomp(x) while(any(y!=x)) { x=y y=prechomp(x) } x } strsplit2=function(x,split,...) { # detect trailing split : and add "" afterwards, so we know if it was there. a=strsplit(pp(x,"|@|@|"),split,...) lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE)) } strsplit3=function(x,split,keepsplit=FALSE,...) { if (keepsplit) { repstring="34HERE43" if (length(grep(repstring,x))) { die(repstring, " repstring already found. Arg! Aborting") } # sanity check # note where we want to split, using \\1 backref to keep the original a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x) } else { a=x repstring=split } b=strsplit2(a,repstring,...) # split, keeping the original delimiters } pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope for ppp() to work replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE) { translate.env=function(x) { if (exists(x)) return(x) a=Sys.getenv(x) if (a!="") return(pp("\"",a,"\"")) x } parts=strsplit2(s,begin.string,fixed=T)[[1]] if (length(parts) < 2 || all(parts=="")) return(s) if (any(trim(parts[-1])=="")) { warning(pp("ppp::replacer(): found begin.string '",begin.string,"' in '",s,"' but had empty/blankspace/end of string following it.")) return(s) } collap=c() collap[1]=parts[1] for (i in 2:length(parts)) { tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]] if (length(tmp)==1) { if (require.end) { warning(pp("ppp::replacer(): could not find end.string '",end.string,"' in string '",s,"' and require.end=TRUE, so karping.")) collap[(i-1)*2]=parts[i] collap[(i-1)*2+1]="" } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow newline to terminate as well, if end not required collap[(i-1)*2+1]="" } } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # collect the rest of parts[i] following tmp[1] and the end.string (assumes end.string is only ever length 1) collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i])) } } text=pp("pp(\"",pp(collap,collapse=""),"\")") # sys.frame(-2) is necessary to get definitions from calling function before where we were defined. if (sys.nframe() > 1) { ftext=eval(parse(text=text),envir=sys.frame(-2)) } else { ftext=eval(parse(text=text)) } ftext } pp=function(...) paste(...,sep="") # must be defined in outermost scope for ppp() to work # shell like string interpolation... ppp("fill in ${myvar} here after `hostname` is $myvar") ppp=function(...) { sa=paste(sep="",...) res=c() for (j in 1:length(sa)) { s=sa[j] s2=replacer(s,"${","}") terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|" s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) # !require.end allows end of line termination res[j]=s3 } do.sys.expecting.output=function(cmd) { got=pp(system(intern=T,cmd),collapse="\n") if (got=="") die("do.sys() on '",cmd,"' returned no output.") got } # now check for backtick system call requests as well, *after* variable substitution is all finished. bt=grep("`",res) if(length(bt)) { sa=res[bt] for (j in 1:length(sa)) { s=sa[j] parts=strsplit2(s,"`",fixed=T)[[1]] if (length(parts) < 3) { res[bt[j]]=s; next; } collap=c() collap[1]=parts[1] for (i in seq(2,length(parts)-1,2)) { cmd=parts[i] collap[i]=do.sys.expecting.output(cmd) collap[i+1]=parts[i+1] } text=pp(collap,collapse="") res[bt[j]]=text } # for j } #end if length(bt) res } # # now demonstrate the use of ppp() in a scripting context: # today="date" month=3 year=2010 show=list() show$syntax = 43 Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`, substituting ${show$syntax} in named lists is also possible. `cal $month $year| head` ") cat("here's the demo output\n") cat(demo,sep="\n") ## # output of this demo script when run, show how to use stdin and ppp() ## ## ## me@host:~/uns/bin$ cat ~/tmp/test | template2.rsh ## not comment 1 ## not comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## here's the demo output ## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29 10:23:49 CDT 2010, substituting 43 in named lists is also possible. March 2010 ## Su Mo Tu We Th Fr Sa ## 1 2 3 4 5 6 ## 7 8 9 10 11 12 13 ## 14 15 16 17 18 19 20 ## 21 22 23 24 25 26 27 ## 28 29 30 31 ## ## me@host:~/uns/bin$ cat ~/tmp/test ## # comment 1 ## not comment 1 ## not comment 2 ## # comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## # comment 3 [[alternative HTML version deleted]]
Hi Jason, Thanks for sharing your solution(s). For other alternatives for running R scripts, you (or your colleague) might want to look into: * Rscript (comes installed with R (these days)) * littler (http://code.google.com/p/littler/) Also, there are some libraries that deal with parsing command line arguments, such as: * http://cran.r-project.org/web/packages/getopt/index.html * http://cran.stat.auckland.ac.nz/web/packages/optparse/index.html -steve -- Steve Lianoglou Graduate Student: Computational Systems Biology | Memorial Sloan-Kettering Cancer Center | Weill Medical College of Cornell University Contact Info: http://cbio.mskcc.org/~lianos/contact
Thanks. You might want to repost it as a text attachment since many of the lines wrapped around. Another more permanent possibility would be to put it on the R wiki at http://rwiki.sciviews.org/doku.php Note that the gsubfn package has a facility for quasi-perl type string interpolation as well. Just preface any function with fn$ and the facility is applied to the arguments of the function (subject to certain heuristics which determine which args to apply it to).> library(gsubfn) > today <- format(Sys.Date()) > show <- list() > show$syntax <- 43 > Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") > > fn$cat('Getting `Sys.getenv("AN_ENV_VAR")` from the environment, on $today,+ `show$syntax` is also possible.\n') Getting greetings (I'm an env var!) from the environment, on 2010-03-29, 43 is also possible. On Mon, Mar 29, 2010 at 11:41 AM, Jason E. Aten <j.e.aten at gmail.com> wrote:> Dear R users, > > A colleague of mine asked me how to write a script (an executable text file > containing R code) in R. After I showed > him, he said that after extensive searching of the R archives, he had not > found anything like these techniques. > > He suggested that I share these methods to enable others to leverage R as a > better alternative to bash/perl scripts. > > So in the interest of giving back to the R community, and with all humility, > I offer the > following small demonstration of one method for creating scripts of R code > that are > executable from the (at least Linux) command line. > > I don't make any warrantees that this will work for you, but if it helps > somebody at least > get starting utilizing R effectively in scripts, then great! > > Best regards, > > Jason > > -- > Jason E. Aten, Ph.D. > > > # file: scriptdemo.rsh > > #!/bin/bash > exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args > $@ > #debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))" > --args $@ > ### The above line starts R and then reads in this script, starting at line > 4: > # > # scriptdemo.rsh : a simple filter script to demonstrate how to write a > script in R that > # ? ? ? ? ? ? ? ?reads stdin and utilizes command line argv. Also shows how > to use ppp() to do > # ? ? ? ? ? ? ? ?bash scripting like variable substitution, which is really > just syntactic > # ? ? ? ? ? ? ? ?sugar. But sugar can be sweet. > # > # NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. If > it breaks, you > # ? ? get to keep both pieces. > > # 1st point of note: notice the exec R invocation above, with the pipe and > tail combo. > # ? This file becomes the program read into R. If it is set chmod+x then you > can execute this file. > > ?pp=function(...) paste(sep="",...) > ?script="scriptdemo.rsh" > ?usage=pp(script,": put help info here") > > ?argv = commandArgs(trailingOnly=TRUE) > > > ?# --help > ?if(any(argv=="--help")) { > ? ?cat(usage) > ? ?quit(save="no",status=0) > ?} > > # 2nd point of note: this is how to read stdin inside a script: > # > > ?# slurp in all the input > ?r=readLines("stdin") > > ?bad=grep("^#",r) # remove comments > > ?# write out lines that didn't start with # > ?cat(r[setdiff(1:length(r),bad)],sep="\n") > > > # 3rd point of note: if you want nice bash shell scripting string > substitution and backticking > # ?you can use my ppp() function. Note it's not well vectorized at the > moment, so it will expect > # ?variables that are substituted from the environment to be of length 1. > # ?A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me > wince), but it gets the job done, > # ?as it's meant as a proof of concept. > > ########################## > # utility functions leading up to final definition of ppp() : shell > scripting like facilities for R > # ?Skip to the end of this file to see what ppp() does for you. > ########################## > > # delete one trailing whitespace > chomp=function(x) { > ? n=nchar(x) > ? a=substr(x,n,n) > ? w=which(a==" " | a == "\n" | a=="\t") > ? if (length(w)) { > ? ? x[w]=substr(x[w],1,n[w]-1) > ? } > ? x > } > > # delete one leading whitespace > prechomp=function(x) { > ? n=nchar(x) > ? a=substr(x,1,1) > ? w=which(a==" " | a == "\n" | a == "\t") > ? if (length(w)) { > ? ? x[w]=substr(x[w],2,n[w]) > ? } > ? x > } > > > # eliminate whitespace leading/trailing from a string > trim=function(x) { > ? y=chomp(x) > ? while(any(y!=x)) { > ? ? x=y > ? ? y=chomp(x) > ? } > > ? y=prechomp(x) > ? while(any(y!=x)) { > ? ? x=y > ? ? y=prechomp(x) > ? } > > ?x > } > > strsplit2=function(x,split,...) { > ? ?# detect trailing split : and add "" afterwards, so we know if it was > there. > ? ?a=strsplit(pp(x,"|@|@|"),split,...) > ? ?lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE)) > } > > strsplit3=function(x,split,keepsplit=FALSE,...) { > ? ?if (keepsplit) { > ? ? ?repstring="34HERE43" > ? ? ?if (length(grep(repstring,x))) { die(repstring, " repstring already > found. Arg! Aborting") ?} # sanity check > ? ? ?# note where we want to split, using \\1 backref to keep the original > ? ? ?a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x) > ? ?} else { > ? ? ? a=x > ? ? ? repstring=split > ? ?} > ? ?b=strsplit2(a,repstring,...) # split, keeping the original delimiters > } > > pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope > for ppp() to work > > replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE) > { > > ? ? translate.env=function(x) { > ? ? ? if (exists(x)) return(x) > ? ? ? a=Sys.getenv(x) > ? ? ? if (a!="") return(pp("\"",a,"\"")) > ? ? ? x > ? ? } > > ? ? parts=strsplit2(s,begin.string,fixed=T)[[1]] > ? ? if (length(parts) < 2 || all(parts=="")) return(s) > ? ? if (any(trim(parts[-1])=="")) { > ? ? ? ?warning(pp("ppp::replacer(): found begin.string '",begin.string,"' > in '",s,"' but had empty/blankspace/end of string following it.")) > ? ? ? ?return(s) > ? ? } > > ? ? collap=c() > ? ? collap[1]=parts[1] > ? ? for (i in 2:length(parts)) { > ? ? ? tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]] > ? ? ? if (length(tmp)==1) { > ? ? ? ? if (require.end) { > ? ? ? ? ? warning(pp("ppp::replacer(): could not find end.string > '",end.string,"' in string '",s,"' and require.end=TRUE, so karping.")) > ? ? ? ? ? collap[(i-1)*2]=parts[i] > ? ? ? ? ? collap[(i-1)*2+1]="" > ? ? ? ? } else { > ? ? ? ? ? collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow > newline to terminate as well, if end not required > ? ? ? ? ? collap[(i-1)*2+1]="" > ? ? ? ? } > ? ? ? } else { > ? ? ? ? collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") > ? ? ? ? # collect the rest of parts[i] following tmp[1] and the end.string > (assumes end.string is only ever length 1) > > collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i])) > ? ? ? } > ? ? } > ? ? text=pp("pp(\"",pp(collap,collapse=""),"\")") > > ? ? # sys.frame(-2) is necessary to get definitions from calling function > before where we were defined. > ? ? if (sys.nframe() > 1) { > ? ? ? ftext=eval(parse(text=text),envir=sys.frame(-2)) > ? ? } else { > ? ? ? ftext=eval(parse(text=text)) > ? ? } > ? ? ftext > } > > pp=function(...) paste(...,sep="") # must be defined in outermost scope for > ppp() to work > > # shell like string interpolation... ppp("fill in ${myvar} here after > `hostname` is $myvar") > ppp=function(...) { > > ?sa=paste(sep="",...) > ?res=c() > ?for (j in 1:length(sa)) { > ? ? s=sa[j] > ? ? s2=replacer(s,"${","}") > ? ? terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|" > ? ? s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) # > !require.end allows end of line termination > ? ? res[j]=s3 > ?} > > ?do.sys.expecting.output=function(cmd) { > ? ?got=pp(system(intern=T,cmd),collapse="\n") > ? ?if (got=="") die("do.sys() on '",cmd,"' returned no output.") > ? ?got > ?} > > ?# now check for backtick system call requests as well, *after* variable > substitution is all finished. > ?bt=grep("`",res) > ?if(length(bt)) { > ? ? sa=res[bt] > ? ? for (j in 1:length(sa)) { > ? ? ? ? s=sa[j] > ? ? ? ? parts=strsplit2(s,"`",fixed=T)[[1]] > ? ? ? ? if (length(parts) < 3) { res[bt[j]]=s; next; } > > ? ? ? ? collap=c() > ? ? ? ? collap[1]=parts[1] > ? ? ? ? for (i in seq(2,length(parts)-1,2)) { > ? ? ? ? ? ?cmd=parts[i] > ? ? ? ? ? ?collap[i]=do.sys.expecting.output(cmd) > ? ? ? ? ? ?collap[i+1]=parts[i+1] > ? ? ? ? } > ? ? ? ? text=pp(collap,collapse="") > ? ? ? ? res[bt[j]]=text > ? ? ? } # for j > ? ? } #end if length(bt) > ?res > } > > > > # > # now demonstrate the use of ppp() in a scripting context: > # > > today="date" > month=3 > year=2010 > > show=list() > show$syntax = 43 > > Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") > > demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`, > substituting ${show$syntax} in named lists is also possible. `cal $month > $year| head` ") > > cat("here's the demo output\n") > cat(demo,sep="\n") > > > ## # output of this demo script when run, show how to use stdin and ppp() > ## > ## > ## me at host:~/uns/bin$ cat ~/tmp/test | template2.rsh > ## not comment 1 > ## not comment 2 > ## not comment 3 > ## not comment 4 > ## not comment 5 > ## here's the demo output > ## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29 > 10:23:49 CDT 2010, substituting 43 in named lists is also possible. > ?March 2010 > ## Su Mo Tu We Th Fr Sa > ## ? ? 1 ?2 ?3 ?4 ?5 ?6 > ## ?7 ?8 ?9 10 11 12 13 > ## 14 15 16 17 18 19 20 > ## 21 22 23 24 25 26 27 > ## 28 29 30 31 > ## > ## me at host:~/uns/bin$ cat ~/tmp/test > ## # comment 1 > ## not comment 1 > ## not comment 2 > ## # comment 2 > ## not comment 3 > ## not comment 4 > ## not comment 5 > ## # comment 3 >
Okay, I'll try again with .txt extension. Thanks David. On Mon, Mar 29, 2010 at 12:50 PM, David Winsemius <dwinsemius at comcast.net>wrote:> I would have made it through the mail-server had you given it an extension > of .txt but not so with the .rsh extension. > > > > On Mar 29, 2010, at 12:31 PM, Jason E. Aten wrote: > > Thanks Gabor. I didn't realize you could. Here is the scriptdemo.rsh >> file >> as a text attachment, in case the line wraps made it hard to read/use. >> >> - Jason >> >> On Mon, Mar 29, 2010 at 11:19 AM, Gabor Grothendieck < >> ggrothendieck at gmail.com> wrote: >> >> Thanks. >>> >>> You might want to repost it as a text attachment since many of the >>> lines wrapped around. >>> >>> Another more permanent possibility would be to put it on the R wiki at >>> http://rwiki.sciviews.org/doku.php >>> >>> Note that the gsubfn package has a facility for quasi-perl type string >>> interpolation as well. Just preface any function with fn$ and the >>> facility is applied to the arguments of the function (subject to >>> certain heuristics which determine which args to apply it to). >>> >>> library(gsubfn) >>>> today <- format(Sys.Date()) >>>> show <- list() >>>> show$syntax <- 43 >>>> Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") >>>> >>>> fn$cat('Getting `Sys.getenv("AN_ENV_VAR")` from the environment, on >>>> >>> $today, >>> + `show$syntax` is also possible.\n') >>> Getting greetings (I'm an env var!) from the environment, on 2010-03-29, >>> 43 is also possible. >>> >>> >>> >>> On Mon, Mar 29, 2010 at 11:41 AM, Jason E. Aten <j.e.aten at gmail.com> >>> wrote: >>> >>>> Dear R users, >>>> >>>> A colleague of mine asked me how to write a script (an executable text >>>> >>> file >>> >>>> containing R code) in R. After I showed >>>> him, he said that after extensive searching of the R archives, he had >>>> not >>>> found anything like these techniques. >>>> >>>> He suggested that I share these methods to enable others to leverage R >>>> as >>>> >>> a >>> >>>> better alternative to bash/perl scripts. >>>> >>>> So in the interest of giving back to the R community, and with all >>>> >>> humility, >>> >>>> I offer the >>>> following small demonstration of one method for creating scripts of R >>>> >>> code >>> >>>> that are >>>> executable from the (at least Linux) command line. >>>> >>>> I don't make any warrantees that this will work for you, but if it helps >>>> somebody at least >>>> get starting utilizing R effectively in scripts, then great! >>>> >>>> Best regards, >>>> >>>> Jason >>>> >>>> -- >>>> Jason E. Aten, Ph.D. >>>> >>>> >>>> # file: scriptdemo.rsh >>>> >>>> >>> ______________________________________________ >> R-help at r-project.org mailing list >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide >> http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. >> > > David Winsemius, MD > West Hartford, CT > >-- Jason E. Aten, Ph.D. (310) 429-4566 cell -------------- next part -------------- #!/bin/bash exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ #debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ ### The above line starts R and then reads in this script, starting at line 4: # # scriptdemo.rsh : a simple filter script to demostrate how to write a script in R that # reads stdin and utilizes command line argv. Also shows how to do # bash scripting like variable substitution. Really just syntactic # sugar. But sugar can be sweet. # # NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. # 1st point of note: notice the exec R invocation above, with the pipe and tail combo. # This file becomes the program read into R. pp=function(...) paste(sep="",...) script="scriptdemo.rsh" usage=pp(script,": put help info here") argv = commandArgs(trailingOnly=TRUE) # --help if(any(argv=="--help")) { cat(usage) quit(save="no",status=0) } # 2nd point of note: this is how to read stdin inside a script: # # slurp in all the input r=readLines("stdin") bad=grep("^#",r) # remove comments # write out lines that didn't start with # cat(r[setdiff(1:length(r),bad)],sep="\n") # 3rd point of note: if you want nice bash shell scripting string substitution and backticking # you can use my ppp() function. Note it's not well vectorized at the moment, so it will expect # variables that are substituted from the environment to be of length 1. # A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me wince), but it gets the job done, # as it's meant as a proof of concept. ########################## # utility functions leading up to final definition of ppp() : shell scripting like facilities for R # Skip down to the end of this file to see what ppp() does, example output is there. ########################## # delete one trailing whitespace chomp=function(x) { n=nchar(x) a=substr(x,n,n) w=which(a==" " | a == "\n" | a=="\t") if (length(w)) { x[w]=substr(x[w],1,n[w]-1) } x } # delete one leading whitespace prechomp=function(x) { n=nchar(x) a=substr(x,1,1) w=which(a==" " | a == "\n" | a == "\t") if (length(w)) { x[w]=substr(x[w],2,n[w]) } x } # eliminate whitespace leading/trailing from a string trim=function(x) { y=chomp(x) while(any(y!=x)) { x=y y=chomp(x) } y=prechomp(x) while(any(y!=x)) { x=y y=prechomp(x) } x } strsplit2=function(x,split,...) { # detect trailing split : and add "" afterwards, so we know if it was there. a=strsplit(pp(x,"|@|@|"),split,...) lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE)) } strsplit3=function(x,split,keepsplit=FALSE,...) { if (keepsplit) { repstring="34HERE43" if (length(grep(repstring,x))) { stop(pp(repstring, " repstring already found. Arg! Aborting")) } # sanity check # note where we want to split, using \\1 backref to keep the original a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x) } else { a=x repstring=split } b=strsplit2(a,repstring,...) # split, keeping the original delimiters } pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope for ppp() to work replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE) { translate.env=function(x) { if (exists(x)) return(x) a=Sys.getenv(x) if (a!="") return(pp("\"",a,"\"")) x } parts=strsplit2(s,begin.string,fixed=T)[[1]] if (length(parts) < 2 || all(parts=="")) return(s) if (any(trim(parts[-1])=="")) { warning(pp("ppp::replacer(): found begin.string '",begin.string,"' in '",s,"' but had empty/blankspace/end of string following it.")) return(s) } collap=c() collap[1]=parts[1] for (i in 2:length(parts)) { tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]] if (length(tmp)==1) { if (require.end) { warning(pp("ppp::replacer(): could not find end.string '",end.string,"' in string '",s,"' and require.end=TRUE, so karping.")) collap[(i-1)*2]=parts[i] collap[(i-1)*2+1]="" } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow newline to terminate as well, if end not required collap[(i-1)*2+1]="" } } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # collect the rest of parts[i] following tmp[1] and the end.string (assumes end.string is only ever length 1) collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i])) } } text=pp("pp(\"",pp(collap,collapse=""),"\")") # sys.frame(-2) is necessary to get definitions from calling function before where we were defined. if (sys.nframe() > 1) { ftext=eval(parse(text=text),envir=sys.frame(-2)) } else { ftext=eval(parse(text=text)) } ftext } pp=function(...) paste(...,sep="") # must be defined in outermost scope for ppp() to work # shell like string interpolation... ppp("fill in ${myvar} here after `hostname` is $myvar") ppp=function(...) { sa=paste(sep="",...) res=c() for (j in 1:length(sa)) { s=sa[j] s2=replacer(s,"${","}") terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|" s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) # !require.end allows end of line termination res[j]=s3 } do.sys.expecting.output=function(cmd) { got=pp(system(intern=T,cmd),collapse="\n") if (got=="") die("do.sys() on '",cmd,"' returned no output.") got } # now check for backtick system call requests as well, *after* variable substitution is all finished. bt=grep("`",res) if(length(bt)) { sa=res[bt] for (j in 1:length(sa)) { s=sa[j] parts=strsplit2(s,"`",fixed=T)[[1]] if (length(parts) < 3) { res[bt[j]]=s; next; } collap=c() collap[1]=parts[1] for (i in seq(2,length(parts)-1,2)) { cmd=parts[i] collap[i]=do.sys.expecting.output(cmd) collap[i+1]=parts[i+1] } text=pp(collap,collapse="") res[bt[j]]=text } # for j } #end if length(bt) res } # # now demonstrate the use of ppp() in a scripting context: # today="date" month=3 year=2010 show=list() show$syntax = 43 Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`, substituting ${show$syntax} in named lists is also possible. `cal $month $year| head` ") cat("here's the demo output\n") cat(demo,sep="\n") ## # output of this demo script when run, show how to use stdin and ppp() ## ## ## me at host:~/uns/bin$ cat ~/tmp/test | scriptdemo.rsh ## not comment 1 ## not comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## here's the demo output ## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29 10:23:49 CDT 2010, substituting 43 in named lists is also possible. March 2010 ## Su Mo Tu We Th Fr Sa ## 1 2 3 4 5 6 ## 7 8 9 10 11 12 13 ## 14 15 16 17 18 19 20 ## 21 22 23 24 25 26 27 ## 28 29 30 31 ## ## me at host:~/uns/bin$ cat ~/tmp/test ## # comment 1 ## not comment 1 ## not comment 2 ## # comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## # comment 3