Dear Friends, I'm trying to run a bunch of tasks in parallel using 'Future' package and for some reason, it's not able to find the data frames that I want it to find. I've created the below sample program to show what I'm doing. Should I be exporting the Global data to each child process? I am not doing that currently because I read somewhere that it's automatically done when using the multisession plan. Any idea what I'm doing wrong? Thanks Ravi if(!require('sqldf')) install.packages('sqldf') if(!require('future')) install.packages('future') if(!require('doFuture')) install.packages('doFuture') if(!require('future.apply')) install.packages('future.apply') library('sqldf') library('future') library("doFuture") library("future.apply") registerDoFuture() plan(multisession, globals = TRUE, workers=5) options(future.globals.maxSize=+Inf) DATA_ASIA <- data.frame(c('NAME1', 'NAME2')) DATA_EUROPE <- data.frame(c('NAME1', 'NAME2', 'NAME3')) DATA_USA <- data.frame(c('NAME1', 'NAME2', 'NAME3', 'NAME4')) DATA_AFRICA <- data.frame(c('NAME1')) LEVEL <- c('ASIA_LEVEL', 'EUROPE_LEVEL', 'USA_LEVEL', 'AFRICA_LEVEL') R_PROG <- c('SELECT COUNT(*) as COUNT FROM DATA_ASIA', 'SELECT COUNT(*) as COUNT FROM DATA_EUROPE', 'SELECT COUNT(*) as COUNT FROM DATA_USA', 'SELECT COUNT(*) as COUNT FROM DATA_AFRICA') RULES_ALL <- data.frame(LEVEL, R_PROG) RULES_ASIA <- subset(RULES_ALL, LEVEL == 'ASIA_LEVEL') RESULT_ASIA <- future(data.table::rbindlist(lapply(1:nrow(RULES_ASIA), function(x) sqldf(RULES_ASIA$R_PROG[x])), use.names = TRUE, fill=TRUE)) RULES_EUROPE <- subset(RULES_ALL, LEVEL == 'EUROPE_LEVEL') RESULT_EUROPE <- future(data.table::rbindlist(lapply(1:nrow(RULES_EUROPE), function(x) sqldf(RULES_EUROPE$R_PROG[x])), use.names = TRUE, fill=TRUE)) RULES_USA <- subset(RULES_ALL, LEVEL == 'USA_LEVEL') RESULT_USA <- future(data.table::rbindlist(lapply(1:nrow(RULES_USA), function(x) sqldf(RULES_USA$R_PROG[x])), use.names = TRUE, fill=TRUE)) RULES_AFRICA <- subset(RULES_ALL, LEVEL == 'AFRICA_LEVEL') RESULTS_AFRICA <- future(data.table::rbindlist(lapply(1:nrow(RULES_AFRICA), function(x) sqldf(RULES_AFRICA$R_PROG[x])), use.names = TRUE, fill=TRUE)) RESULT_ASIA <- value(RESULT_ASIA) RESULT_EUROPE <- value(RESULT_EUROPE) RESULT_USA <- value(RESULT_USA) RESULTS_AFRICA <- value(RESULTS_AFRICA) -- This email has been checked for viruses by AVG. https://www.avg.com [[alternative HTML version deleted]]
Hi Ravi, Please read the ?future documentation, the answers to all your questions are explained there. Best, Ista On Thu, May 21, 2020 at 3:20 PM Ravi Jeyaraman <ravi76 at gmail.com> wrote:> > Dear Friends, > > > > I'm trying to run a bunch of tasks in parallel using 'Future' package and > for some reason, it's not able to find the data frames that I want it to > find. I've created the below sample program to show what I'm doing. Should > I be exporting the Global data to each child process? I am not doing that > currently because I read somewhere that it's automatically done when using > the multisession plan. Any idea what I'm doing wrong? > > > > Thanks > > Ravi > > > > > > if(!require('sqldf')) install.packages('sqldf') > > if(!require('future')) install.packages('future') > > if(!require('doFuture')) install.packages('doFuture') > > if(!require('future.apply')) install.packages('future.apply') > > > > library('sqldf') > > library('future') > > library("doFuture") > > library("future.apply") > > > > registerDoFuture() > > plan(multisession, globals = TRUE, workers=5) > > options(future.globals.maxSize=+Inf) > > > > DATA_ASIA <- data.frame(c('NAME1', 'NAME2')) > > DATA_EUROPE <- data.frame(c('NAME1', 'NAME2', 'NAME3')) > > DATA_USA <- data.frame(c('NAME1', 'NAME2', 'NAME3', 'NAME4')) > > DATA_AFRICA <- data.frame(c('NAME1')) > > > > LEVEL <- c('ASIA_LEVEL', 'EUROPE_LEVEL', 'USA_LEVEL', 'AFRICA_LEVEL') > > R_PROG <- c('SELECT COUNT(*) as COUNT FROM DATA_ASIA', > > 'SELECT COUNT(*) as COUNT FROM DATA_EUROPE', > > 'SELECT COUNT(*) as COUNT FROM DATA_USA', > > 'SELECT COUNT(*) as COUNT FROM DATA_AFRICA') > > > > RULES_ALL <- data.frame(LEVEL, R_PROG) > > > > RULES_ASIA <- subset(RULES_ALL, LEVEL == 'ASIA_LEVEL') > > RESULT_ASIA <- future(data.table::rbindlist(lapply(1:nrow(RULES_ASIA), > function(x) sqldf(RULES_ASIA$R_PROG[x])), use.names = TRUE, fill=TRUE)) > > > > RULES_EUROPE <- subset(RULES_ALL, LEVEL == 'EUROPE_LEVEL') > > RESULT_EUROPE <- future(data.table::rbindlist(lapply(1:nrow(RULES_EUROPE), > function(x) sqldf(RULES_EUROPE$R_PROG[x])), use.names = TRUE, fill=TRUE)) > > > > RULES_USA <- subset(RULES_ALL, LEVEL == 'USA_LEVEL') > > RESULT_USA <- future(data.table::rbindlist(lapply(1:nrow(RULES_USA), > function(x) sqldf(RULES_USA$R_PROG[x])), use.names = TRUE, fill=TRUE)) > > > > RULES_AFRICA <- subset(RULES_ALL, LEVEL == 'AFRICA_LEVEL') > > RESULTS_AFRICA <- future(data.table::rbindlist(lapply(1:nrow(RULES_AFRICA), > function(x) sqldf(RULES_AFRICA$R_PROG[x])), use.names = TRUE, fill=TRUE)) > > > > RESULT_ASIA <- value(RESULT_ASIA) > > RESULT_EUROPE <- value(RESULT_EUROPE) > > RESULT_USA <- value(RESULT_USA) > > RESULTS_AFRICA <- value(RESULTS_AFRICA) > > > > > > > > > > -- > This email has been checked for viruses by AVG. > https://www.avg.com > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.
More specifically, read the vignettes. Actually, always start with the package vignettes if any are available. On May 21, 2020 1:51:48 PM PDT, Ista Zahn <istazahn at gmail.com> wrote:>Hi Ravi, > >Please read the ?future documentation, the answers to all your >questions are explained there. > >Best, >Ista > >On Thu, May 21, 2020 at 3:20 PM Ravi Jeyaraman <ravi76 at gmail.com> >wrote: >> >> Dear Friends, >> >> >> >> I'm trying to run a bunch of tasks in parallel using 'Future' package >and >> for some reason, it's not able to find the data frames that I want it >to >> find. I've created the below sample program to show what I'm doing. >Should >> I be exporting the Global data to each child process? I am not doing >that >> currently because I read somewhere that it's automatically done when >using >> the multisession plan. Any idea what I'm doing wrong? >> >> >> >> Thanks >> >> Ravi >> >> >> >> >> >> if(!require('sqldf')) install.packages('sqldf') >> >> if(!require('future')) install.packages('future') >> >> if(!require('doFuture')) install.packages('doFuture') >> >> if(!require('future.apply')) install.packages('future.apply') >> >> >> >> library('sqldf') >> >> library('future') >> >> library("doFuture") >> >> library("future.apply") >> >> >> >> registerDoFuture() >> >> plan(multisession, globals = TRUE, workers=5) >> >> options(future.globals.maxSize=+Inf) >> >> >> >> DATA_ASIA <- data.frame(c('NAME1', 'NAME2')) >> >> DATA_EUROPE <- data.frame(c('NAME1', 'NAME2', 'NAME3')) >> >> DATA_USA <- data.frame(c('NAME1', 'NAME2', 'NAME3', 'NAME4')) >> >> DATA_AFRICA <- data.frame(c('NAME1')) >> >> >> >> LEVEL <- c('ASIA_LEVEL', 'EUROPE_LEVEL', 'USA_LEVEL', 'AFRICA_LEVEL') >> >> R_PROG <- c('SELECT COUNT(*) as COUNT FROM DATA_ASIA', >> >> 'SELECT COUNT(*) as COUNT FROM DATA_EUROPE', >> >> 'SELECT COUNT(*) as COUNT FROM DATA_USA', >> >> 'SELECT COUNT(*) as COUNT FROM DATA_AFRICA') >> >> >> >> RULES_ALL <- data.frame(LEVEL, R_PROG) >> >> >> >> RULES_ASIA <- subset(RULES_ALL, LEVEL == 'ASIA_LEVEL') >> >> RESULT_ASIA <- >future(data.table::rbindlist(lapply(1:nrow(RULES_ASIA), >> function(x) sqldf(RULES_ASIA$R_PROG[x])), use.names = TRUE, >fill=TRUE)) >> >> >> >> RULES_EUROPE <- subset(RULES_ALL, LEVEL == 'EUROPE_LEVEL') >> >> RESULT_EUROPE <- >future(data.table::rbindlist(lapply(1:nrow(RULES_EUROPE), >> function(x) sqldf(RULES_EUROPE$R_PROG[x])), use.names = TRUE, >fill=TRUE)) >> >> >> >> RULES_USA <- subset(RULES_ALL, LEVEL == 'USA_LEVEL') >> >> RESULT_USA <- future(data.table::rbindlist(lapply(1:nrow(RULES_USA), >> function(x) sqldf(RULES_USA$R_PROG[x])), use.names = TRUE, >fill=TRUE)) >> >> >> >> RULES_AFRICA <- subset(RULES_ALL, LEVEL == 'AFRICA_LEVEL') >> >> RESULTS_AFRICA <- >future(data.table::rbindlist(lapply(1:nrow(RULES_AFRICA), >> function(x) sqldf(RULES_AFRICA$R_PROG[x])), use.names = TRUE, >fill=TRUE)) >> >> >> >> RESULT_ASIA <- value(RESULT_ASIA) >> >> RESULT_EUROPE <- value(RESULT_EUROPE) >> >> RESULT_USA <- value(RESULT_USA) >> >> RESULTS_AFRICA <- value(RESULTS_AFRICA) >> >> >> >> >> >> >> >> >> >> -- >> This email has been checked for viruses by AVG. >> https://www.avg.com >> >> [[alternative HTML version deleted]] >> >> ______________________________________________ >> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. > >______________________________________________ >R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code.-- Sent from my phone. Please excuse my brevity.