Hello, I'm trying to fit a sine curve over successive temperature readings (i.e. minimum and maximum temperature) over several days and for many locations. The code below shows a hypothetical example of 5000 locations with 7 days of temperature data. Not very efficient when you have many more locations and days. The linear interpolation takes 0.7 seconds, and the sine interpolations take 2 to 4 seconds depending on the approach. Any ideas on how to speed this up? Thanks in advance. Ariel ### R Code ###### # 1- Prepare data fake data days<- 7 n <- 5000*days tmin <- matrix(rnorm(n, mean=0) , ncol=days, nrow=5000) tmax <- matrix(rnorm(n, mean=10), ncol=days, nrow=5000) m <- matrix(NA, ncol=days*2, nrow=5000) m[,seq(1,ncol(m),2)] <- tmin m[,seq(2,ncol(m)+1,2)]<- tmax # check first row plot(1:ncol(m), m[1,], type="l") # 2 -linear interpolation: 0.66 seconds xout <- seq(0,ncol(m),0.25/24*2)[-1] # time step = 0.25 hours or 15 minutes system.time( m1 <- t(apply(m,1, function(y) approx(x=1:ncol(m), y=y, xout=xout, method="linear")$y)) ) # Check first row plot(1:ncol(m), m[1,], type="l") points(xout, m1[1,], col="red", cex=1) # 3- sine interpolation sine.approx1 <- function(index, tmin, tmax) { b <- (2*pi)/24 # period = 24 hours c <- pi/2 # horizontal shift xout <- seq(0,24,0.25)[-1] yhat <- apply(cbind(tmin[index,],tmax[index,]), 1, function(z) diff(z)/2 * sin(b*xout-c) + mean(z)) #yhat <- yhat[-nrow(yhat),] yhat <- c(yhat) #plot(yhat, type="l") } sine.approx2 <- function(index, tmin, tmax) { b <- (2*pi)/24 # period = 24 hours c <- pi/2 # horizontal shift xout1 <- seq(0 ,12,0.25) xout2 <- seq(12,24,0.25)[-1] xout2 <- xout2[-length(xout2)] yhat1 <- apply(cbind(tmin[index,] ,tmax[index,] ), 1, function(z) diff(z)/2 * sin(b*xout1-c) + mean(z)) yhat2 <- apply(cbind(tmax[index,][-length(tmax[index,])],tmin[index,][-1]), 1, function(z) diff(z)/2 * sin(b*xout2+c) + mean(z)) yhat2 <- cbind(yhat2,NA) yhat3 <- rbind(yhat1,yhat2) #yhat3 <- yhat3[-nrow(yhat3),] yhat3 <- c(yhat3) yhat <- yhat3 #plot(c(yhat1)) #plot(c(yhat2)) #plot(yhat, type="l") } # Single sine: 2.23 seconds system.time( m2 <- t(sapply(1:nrow(m), function(i) sine.approx1(i, tmin=tmin, tmax=tmax))) ) # Double sine: 4.03 seconds system.time( m3 <- t(sapply(1:nrow(m), function(i) sine.approx2(i, tmin=tmin, tmax=tmax))) ) # take a look at approach 1 plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") points(xout, m2[1,], col="red", cex=1) # take a look at approach 2 plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") points(xout, m3[1,], col="blue", cex=1) --- Ariel Ortiz-Bobea Fellow Resources for the Future 1616 P Street, N.W. Washington, DC 20036 202-328-5173 [[alternative HTML version deleted]]
You can try this: http://max2.ese.u-psud.fr/epc/conservation/Girondot/Publications/Blog_r/Entrees/2013/6/4_GLM_with_periodic_(annual)_transformation_of_factor.html Sincerely, Marc Le 13/05/2014 05:42, Ortiz-Bobea, Ariel a ?crit :> Hello, > > I'm trying to fit a sine curve over successive temperature readings (i.e. minimum and maximum temperature) over several days and for many locations. The code below shows a hypothetical example of 5000 locations with 7 days of temperature data. Not very efficient when you have many more locations and days. > > The linear interpolation takes 0.7 seconds, and the sine interpolations take 2 to 4 seconds depending on the approach. > > Any ideas on how to speed this up? Thanks in advance. > > Ariel > > ### R Code ###### > > # 1- Prepare data fake data > days<- 7 > n <- 5000*days > tmin <- matrix(rnorm(n, mean=0) , ncol=days, nrow=5000) > tmax <- matrix(rnorm(n, mean=10), ncol=days, nrow=5000) > m <- matrix(NA, ncol=days*2, nrow=5000) > m[,seq(1,ncol(m),2)] <- tmin > m[,seq(2,ncol(m)+1,2)]<- tmax > # check first row > plot(1:ncol(m), m[1,], type="l") > > # 2 -linear interpolation: 0.66 seconds > xout <- seq(0,ncol(m),0.25/24*2)[-1] # time step = 0.25 hours or 15 minutes > system.time( m1 <- t(apply(m,1, function(y) approx(x=1:ncol(m), y=y, xout=xout, method="linear")$y)) ) > # Check first row > plot(1:ncol(m), m[1,], type="l") > points(xout, m1[1,], col="red", cex=1) > > > # 3- sine interpolation > sine.approx1 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout <- seq(0,24,0.25)[-1] > yhat <- apply(cbind(tmin[index,],tmax[index,]), 1, function(z) diff(z)/2 * sin(b*xout-c) + mean(z)) > #yhat <- yhat[-nrow(yhat),] > yhat <- c(yhat) > #plot(yhat, type="l") > } > sine.approx2 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout1 <- seq(0 ,12,0.25) > xout2 <- seq(12,24,0.25)[-1] > xout2 <- xout2[-length(xout2)] > yhat1 <- apply(cbind(tmin[index,] ,tmax[index,] ), 1, function(z) diff(z)/2 * sin(b*xout1-c) + mean(z)) > yhat2 <- apply(cbind(tmax[index,][-length(tmax[index,])],tmin[index,][-1]), 1, function(z) diff(z)/2 * sin(b*xout2+c) + mean(z)) > yhat2 <- cbind(yhat2,NA) > yhat3 <- rbind(yhat1,yhat2) > #yhat3 <- yhat3[-nrow(yhat3),] > yhat3 <- c(yhat3) > yhat <- yhat3 > #plot(c(yhat1)) > #plot(c(yhat2)) > #plot(yhat, type="l") > } > > # Single sine: 2.23 seconds > system.time( m2 <- t(sapply(1:nrow(m), function(i) sine.approx1(i, tmin=tmin, tmax=tmax))) ) > > # Double sine: 4.03 seconds > system.time( m3 <- t(sapply(1:nrow(m), function(i) sine.approx2(i, tmin=tmin, tmax=tmax))) ) > > # take a look at approach 1 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m2[1,], col="red", cex=1) > > # take a look at approach 2 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m3[1,], col="blue", cex=1) > > > --- > Ariel Ortiz-Bobea > Fellow > Resources for the Future > 1616 P Street, N.W. > Washington, DC 20036 > 202-328-5173 > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >
Hi Why not use functional data analysis. There is an R package called fda to help you. See http://www.psych.mcgill.ca/misc/fda/ex-weather-a1.html for an example exploring temperature from different locations. Yours sincerely / Med venlig hilsen Frede Aakmann T?gersen Specialist, M.Sc., Ph.D. Plant Performance & Modeling Technology & Service Solutions T +45 9730 5135 M +45 2547 6050 frtog at vestas.com http://www.vestas.com Company reg. name: Vestas Wind Systems A/S This e-mail is subject to our e-mail disclaimer statement. Please refer to www.vestas.com/legal/notice If you have received this e-mail in error please contact the sender.> -----Original Message----- > From: r-help-bounces at r-project.org [mailto:r-help-bounces at r-project.org] > On Behalf Of Ortiz-Bobea, Ariel > Sent: 13. maj 2014 05:42 > To: r-help at r-project.org > Subject: [R] efficient sine interpolation > > Hello, > > I'm trying to fit a sine curve over successive temperature readings (i.e. > minimum and maximum temperature) over several days and for many > locations. The code below shows a hypothetical example of 5000 locations > with 7 days of temperature data. Not very efficient when you have many > more locations and days. > > The linear interpolation takes 0.7 seconds, and the sine interpolations take 2 > to 4 seconds depending on the approach. > > Any ideas on how to speed this up? Thanks in advance. > > Ariel > > ### R Code ###### > > # 1- Prepare data fake data > days<- 7 > n <- 5000*days > tmin <- matrix(rnorm(n, mean=0) , ncol=days, nrow=5000) > tmax <- matrix(rnorm(n, mean=10), ncol=days, nrow=5000) > m <- matrix(NA, ncol=days*2, nrow=5000) > m[,seq(1,ncol(m),2)] <- tmin > m[,seq(2,ncol(m)+1,2)]<- tmax > # check first row > plot(1:ncol(m), m[1,], type="l") > > # 2 -linear interpolation: 0.66 seconds > xout <- seq(0,ncol(m),0.25/24*2)[-1] # time step = 0.25 hours or 15 minutes > system.time( m1 <- t(apply(m,1, function(y) approx(x=1:ncol(m), y=y, > xout=xout, method="linear")$y)) ) > # Check first row > plot(1:ncol(m), m[1,], type="l") > points(xout, m1[1,], col="red", cex=1) > > > # 3- sine interpolation > sine.approx1 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout <- seq(0,24,0.25)[-1] > yhat <- apply(cbind(tmin[index,],tmax[index,]), 1, function(z) diff(z)/2 * > sin(b*xout-c) + mean(z)) > #yhat <- yhat[-nrow(yhat),] > yhat <- c(yhat) > #plot(yhat, type="l") > } > sine.approx2 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout1 <- seq(0 ,12,0.25) > xout2 <- seq(12,24,0.25)[-1] > xout2 <- xout2[-length(xout2)] > yhat1 <- apply(cbind(tmin[index,] ,tmax[index,] ), 1, > function(z) diff(z)/2 * sin(b*xout1-c) + mean(z)) > yhat2 <- apply(cbind(tmax[index,][-length(tmax[index,])],tmin[index,][- > 1]), 1, function(z) diff(z)/2 * sin(b*xout2+c) + mean(z)) > yhat2 <- cbind(yhat2,NA) > yhat3 <- rbind(yhat1,yhat2) > #yhat3 <- yhat3[-nrow(yhat3),] > yhat3 <- c(yhat3) > yhat <- yhat3 > #plot(c(yhat1)) > #plot(c(yhat2)) > #plot(yhat, type="l") > } > > # Single sine: 2.23 seconds > system.time( m2 <- t(sapply(1:nrow(m), function(i) sine.approx1(i, > tmin=tmin, tmax=tmax))) ) > > # Double sine: 4.03 seconds > system.time( m3 <- t(sapply(1:nrow(m), function(i) sine.approx2(i, > tmin=tmin, tmax=tmax))) ) > > # take a look at approach 1 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m2[1,], col="red", cex=1) > > # take a look at approach 2 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m3[1,], col="blue", cex=1) > > > --- > Ariel Ortiz-Bobea > Fellow > Resources for the Future > 1616 P Street, N.W. > Washington, DC 20036 > 202-328-5173 > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting- > guide.html > and provide commented, minimal, self-contained, reproducible code.
Not sure this approach yields meaningful data, but as a demonstration of vectorization I got a factor of 10 speedup. sine.approx3 <- function( tmin, tmax ) { B <- (2*pi)/24 # period = 24 hours C <- pi/2 # horizontal shift tmin <- t( tmin ) tmax <- t( tmax ) idx <- seq.int( 24 * 4 * nrow( tmax ) - 12 * 4 ) xout <- ( idx - 1 ) * 0.25 cycles <- sin( B * xout - C ) mag <- matrix( NA, ncol=ncol( tmax ), nrow=length( idx ) ) idxup <- 2 * seq.int( nrow( tmax ) ) - 1 idxdn <- idxup[ -1 ] - 1 idxups <- rep( seq.int( nrow( tmax ) ), each=48 ) idxdns <- rep( seq.int( nrow( tmax ) - 1 ), each=48 ) idxupm <- c( outer( 1:48, 48*( idxup - 1 ), FUN="+" ) ) idxdnm <- c( outer( 1:48, 48*( idxdn - 1 ), FUN="+" ) ) mag[ idxupm, ] <- ( tmax - tmin )[ idxups, ] / 2 mag[ idxdnm, ] <- ( tmax[ -nrow( tmax ), ] - tmin[ -1, ] )[ idxdns, ] / 2 mag <- mag * rep( cycles, times=ncol( mag ) ) mag[ idxupm, ] <- mag[ idxupm, ] + ( tmax + tmin )[ idxups, ] / 2 mag[ idxdnm, ] <- mag[ idxdnm, ] + ( tmax[ -nrow( tmax ), ] + tmin[ -1, ] )[ idxdns, ] / 2 t( mag ) } On Tue, 13 May 2014, Ortiz-Bobea, Ariel wrote:> Hello, > > I'm trying to fit a sine curve over successive temperature readings (i.e. minimum and maximum temperature) over several days and for many locations. The code below shows a hypothetical example of 5000 locations with 7 days of temperature data. Not very efficient when you have many more locations and days. > > The linear interpolation takes 0.7 seconds, and the sine interpolations take 2 to 4 seconds depending on the approach. > > Any ideas on how to speed this up? Thanks in advance. > > Ariel > > ### R Code ###### > > # 1- Prepare data fake data > days<- 7 > n <- 5000*days > tmin <- matrix(rnorm(n, mean=0) , ncol=days, nrow=5000) > tmax <- matrix(rnorm(n, mean=10), ncol=days, nrow=5000) > m <- matrix(NA, ncol=days*2, nrow=5000) > m[,seq(1,ncol(m),2)] <- tmin > m[,seq(2,ncol(m)+1,2)]<- tmax > # check first row > plot(1:ncol(m), m[1,], type="l") > > # 2 -linear interpolation: 0.66 seconds > xout <- seq(0,ncol(m),0.25/24*2)[-1] # time step = 0.25 hours or 15 minutes > system.time( m1 <- t(apply(m,1, function(y) approx(x=1:ncol(m), y=y, xout=xout, method="linear")$y)) ) > # Check first row > plot(1:ncol(m), m[1,], type="l") > points(xout, m1[1,], col="red", cex=1) > > > # 3- sine interpolation > sine.approx1 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout <- seq(0,24,0.25)[-1] > yhat <- apply(cbind(tmin[index,],tmax[index,]), 1, function(z) diff(z)/2 * sin(b*xout-c) + mean(z)) > #yhat <- yhat[-nrow(yhat),] > yhat <- c(yhat) > #plot(yhat, type="l") > } > sine.approx2 <- function(index, tmin, tmax) { > b <- (2*pi)/24 # period = 24 hours > c <- pi/2 # horizontal shift > xout1 <- seq(0 ,12,0.25) > xout2 <- seq(12,24,0.25)[-1] > xout2 <- xout2[-length(xout2)] > yhat1 <- apply(cbind(tmin[index,] ,tmax[index,] ), 1, function(z) diff(z)/2 * sin(b*xout1-c) + mean(z)) > yhat2 <- apply(cbind(tmax[index,][-length(tmax[index,])],tmin[index,][-1]), 1, function(z) diff(z)/2 * sin(b*xout2+c) + mean(z)) > yhat2 <- cbind(yhat2,NA) > yhat3 <- rbind(yhat1,yhat2) > #yhat3 <- yhat3[-nrow(yhat3),] > yhat3 <- c(yhat3) > yhat <- yhat3 > #plot(c(yhat1)) > #plot(c(yhat2)) > #plot(yhat, type="l") > } > > # Single sine: 2.23 seconds > system.time( m2 <- t(sapply(1:nrow(m), function(i) sine.approx1(i, tmin=tmin, tmax=tmax))) ) > > # Double sine: 4.03 seconds > system.time( m3 <- t(sapply(1:nrow(m), function(i) sine.approx2(i, tmin=tmin, tmax=tmax))) ) > > # take a look at approach 1 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m2[1,], col="red", cex=1) > > # take a look at approach 2 > plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l") > points(xout, m3[1,], col="blue", cex=1) > > > --- > Ariel Ortiz-Bobea > Fellow > Resources for the Future > 1616 P Street, N.W. > Washington, DC 20036 > 202-328-5173 > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >--------------------------------------------------------------------------- Jeff Newmiller The ..... ..... Go Live... DCN:<jdnewmil at dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go... Live: OO#.. Dead: OO#.. Playing Research Engineer (Solar/Batteries O.O#. #.O#. with /Software/Embedded Controllers) .OO#. .OO#. rocks...1k