The answer to your question is yes. You might consider using the parallel
package., and I would suggest starting with a simpler test case to learn how it
works and incrementally adding complexity of packages and data handling.
--
Sent from my phone. Please excuse my brevity.
On April 20, 2016 7:37:07 AM PDT, Miluji Sb <milujisb at gmail.com>
wrote:>I am trying to run the following code in R on a Linux cluster. I would
>like
>to use the full processing power (specifying cores/nodes/memory). The
>code
>essentially runs predictions based on a GAM regression and saves the
>results as a CSV file for multiple sets of data (here I only show two).
>
>Is it possible to run this code using HPC packages such as
>Rmpi/snow/doParallel? Thank you!
>
>#####################
>library(data.table)
>library(mgcv)
>library(reshape2)
>library(dplyr)
>library(tidyr)
>library(lubridate)
>library(DataCombine)
>#
>gam_max_count_wk <- gam(count_pop ~ factor(citycode) + factor(year) +
>factor(week) + s(lnincome) + s(tmax) +
>s(hmax),data=cont,na.action="na.omit", method="ML")
>
>#
># Historic
>temp_hist <-
read.csv("/work/sd00815/giss_historic/giss_temp_hist.csv")
>humid_hist <-
read.csv("/work/sd00815/giss_historic/giss_hum_hist.csv")
>#
>temp_hist <- as.data.table(temp_hist)
>humid_hist <- as.data.table(humid_hist)
>#
># Merge
>mykey<- c("FIPS", "year","month",
"week")
>setkeyv(temp_hist, mykey)
>setkeyv(humid_hist, mykey)
>#
>hist<- merge(temp_hist, humid_hist, by=mykey)
>#
>hist$X.x <- NULL
>hist$X.y <- NULL
>#
># Max
>hist_max <- hist
>hist_max$FIPS <- hist_max$year <- hist_max$month <- hist_max$tmin
<-
>hist_max$tmean <- hist_max$hmin <- hist_max$hmean <- NULL
>#
># Adding Factors
>hist_max$citycode <- rep(101,nrow(hist_max))
>hist_max$year <- rep(2010,nrow(hist_max))
>hist_max$lnincome <- rep(10.262,nrow(hist_max))
>#
># Predictions
>pred_hist_max <- predict.gam(gam_max_count_wk,hist_max)
>#
>pred_hist_max <- as.data.table(pred_hist_max)
>pred_hist_max <- cbind(hist, pred_hist_max)
>pred_hist_max$tmax <- pred_hist_max$tmean <- pred_hist_max$tmin <-
>pred_hist_max$hmean <- pred_hist_max$hmax <- pred_hist_max$hmin <-
NULL
>#
># Aggregate by FIPS
>max_hist <- pred_hist_max %>%
> group_by(FIPS) %>%
> summarise(pred_hist = mean(pred_hist_max))
>#
>### Future
>## 4.5
># 4.5_2021_2050
>temp_sim <-
>read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv")
>humid_sim <-
>read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv")
>#
># Max
>temp_sim <- as.data.table(temp_sim)
>setnames(temp_sim, "max", "tmax")
>setnames(temp_sim, "min", "tmin")
>setnames(temp_sim, "avg", "tmean")
>#
>humid_sim <- as.data.table(humid_sim)
>setnames(humid_sim, "max", "hmax")
>setnames(humid_sim, "min", "hmin")
>setnames(humid_sim, "avg", "hmean")
>#
>temp_sim$X <- NULL
>humid_sim$X <- NULL
>#
># Merge
>mykey<- c("FIPS", "year","month",
"week")
>setkeyv(temp_sim, mykey)
>setkeyv(humid_sim, mykey)
>#
>sim <- merge(temp_sim, humid_sim, by=mykey)
>#
>sim_max <- sim
>#
>sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <-
>sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL
>#
># Adding Factors
>sim_max$citycode <- rep(101,nrow(sim_max))
>sim_max$year <- rep(2010,nrow(sim_max))
>sim_max$week <- rep(1,nrow(sim_max))
>sim_max$lnincome <- rep(10.262,nrow(sim_max))
>#
># Predictions
>pred_sim_max <- predict.gam(gam_max_count_wk,sim_max)
>#
>pred_sim_max <- as.data.table(pred_sim_max)
>pred_sim_max <- cbind(sim, pred_sim_max)
>pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <-
>pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <-
NULL
>#
># Aggregate by FIPS
>max_sim <- pred_sim_max %>%
> group_by(FIPS) %>%
> summarise(pred_sim = mean(pred_sim_max))
>#
># Merge with Historical Data
>max_hist$FIPS <- as.factor(max_hist$FIPS)
>max_sim$FIPS <- as.factor(max_sim$FIPS)
>#
>mykey1<- c("FIPS")
>setkeyv(max_hist, mykey1)
>setkeyv(max_sim, mykey1)
>max_change <- merge(max_hist, max_sim, by=mykey1)
>max_change$change <-
>((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100
>#
>write.csv(max_change, file
>"/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2021_2050.csv")
>
>
>
># 4.5_2081_2100
>temp_sim <-
>read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv")
>humid_sim <-
>read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv")
>#
># Max
>temp_sim <- as.data.table(temp_sim)
>setnames(temp_sim, "max", "tmax")
>setnames(temp_sim, "min", "tmin")
>setnames(temp_sim, "avg", "tmean")
>#
>humid_sim <- as.data.table(humid_sim)
>setnames(humid_sim, "max", "hmax")
>setnames(humid_sim, "min", "hmin")
>setnames(humid_sim, "avg", "hmean")
>#
>temp_sim$X <- NULL
>humid_sim$X <- NULL
>#
># Merge
>mykey<- c("FIPS", "year","month",
"week")
>setkeyv(temp_sim, mykey)
>setkeyv(humid_sim, mykey)
>#
>sim <- merge(temp_sim, humid_sim, by=mykey)
>#
>sim_max <- sim
>#
>sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <-
>sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL
>#
># Adding Factors
>sim_max$citycode <- rep(101,nrow(sim_max))
>sim_max$year <- rep(2010,nrow(sim_max))
>sim_max$week <- rep(1,nrow(sim_max))
>sim_max$lnincome <- rep(10.262,nrow(sim_max))
>#
># Predictions
>pred_sim_max <- predict.gam(gam_max_count_wk,sim_max)
>#
>pred_sim_max <- as.data.table(pred_sim_max)
>pred_sim_max <- cbind(sim, pred_sim_max)
>pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <-
>pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <-
NULL
>#
># Aggregate by FIPS
>max_sim <- pred_sim_max %>%
> group_by(FIPS) %>%
> summarise(pred_sim = mean(pred_sim_max))
>#
># Merge with Historical Data
>max_hist$FIPS <- as.factor(max_hist$FIPS)
>max_sim$FIPS <- as.factor(max_sim$FIPS)
>#
>mykey1<- c("FIPS")
>setkeyv(max_hist, mykey1)
>setkeyv(max_sim, mykey1)
>max_change <- merge(max_hist, max_sim, by=mykey1)
>max_change$change <-
>((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100
>#
>write.csv(max_change, file
>"/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2081_2100.csv")
>
>####################
>
>
>Sincerely,
>
>Milu
>
> [[alternative HTML version deleted]]
>
>______________________________________________
>R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide
>http://www.R-project.org/posting-guide.html
>and provide commented, minimal, self-contained, reproducible code.
[[alternative HTML version deleted]]