Can't seem to get the code below working. It gets stuck on line 24 inside the function hm; comments show the line in question. The function hm is called by sapply and is at the bottom of the code. Other stuff above line 24 works correctly including the first couple of lines of the function hm. Should I be using a different apply function or am I doing something wrong with xmlTreeParse ? library(XML) url.montco <- "http://webapp.montcopa.org/sherreal/salelist.asp?saledate=07/27/2011" tbl <-data.frame(readHTMLTable(url.montco))[, c(3,5,6,8,9)] tbl <-tbl[2: length(tbl[,1]),] names(tbl) <- c("Address", "Township", "Parcel", "SaleDate", "Costs"); rownames(tbl) <- NULL v <- gregexpr("( aka )|( AKA )",tbl$Address) s <-sapply(v, function(x) max(unlist(x))) tbl$Address <- substring(tbl$Address, ifelse(s== -1, 0, s+4), 10000) tbl$Cost <- gsub(',', '', tbl$Costs) temp <- strsplit(tbl$Cost, "\\$") temp <- do.call(rbind, temp) # create a matrix mode(temp) <- 'numeric' tbl$Debt <- round(temp[, 2]/1000,2) tbl$Court <- round(temp[, 3]/1000,2) z <- data.frame(substr(tbl$SaleDate,regexpr("[A-Za-z]", tbl$SaleDate), regexpr("[0-9]", tbl$SaleDate,)-1)) ; names(z) <- "Action" y <- data.frame(substr(tbl$SaleDate,regexpr("[0-9]", tbl$SaleDate),2011)) ; names(y) <- "ActionDate" tbl <-cbind(tbl[, c(1,2,3,7,8)],z,y) new.add <- paste(tbl$Address,"&citystatezip=",tbl$Township,"%2C+PA", sep='') new.add <- sub("^( )+","", new.add) new.add <-data.frame(gsub("( )+",'+', new.add)); names(new.add) <- "ParseAddress" hm <- function(x) { url.zill <-paste("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=",x, sep="") ############## problem line is next ################################# zdoc <-xmlTreeParse(url.zill, useInternalNode=TRUE, isURL=TRUE) ############# problem line above ################################## f$zpid <- sapply(getNodeSet(zdoc, "//result/zpid"), xmlValue) f$zest.low <-sapply(getNodeSet(zdoc, "//valuationRange/low"), xmlValue) f$zest <- sapply(getNodeSet(zdoc, "//zestimate/amount"), xmlValue) rm(zdoc) return(f) } j <-sapply(new.add, FUN=hm) print(zest) -- View this message in context: http://r.789695.n4.nabble.com/Stuck-can-t-get-sapply-and-xmlTreeParse-working-tp3644894p3644894.html Sent from the R help mailing list archive at Nabble.com.
The value of 'url.zill' is a vector of 407 character strings: Browse[1]> str(url.zill) chr [1:407] "http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=10+PACER+LN&citystatezip=East+"| __truncated__ ... Isn't it supposed to be just a single file name? On Mon, Jul 4, 2011 at 8:42 PM, eric <ericstrom at aol.com> wrote:> Can't seem to get the code below working. It gets stuck on line 24 inside the > function hm; comments show the line in question. The function hm is called > by sapply and is at the bottom of the code. Other stuff above line 24 works > correctly including the first couple of lines of the function hm. Should I > be using a different apply function or am I doing something wrong with > xmlTreeParse ? > > > library(XML) > url.montco <- > "http://webapp.montcopa.org/sherreal/salelist.asp?saledate=07/27/2011" > tbl <-data.frame(readHTMLTable(url.montco))[, c(3,5,6,8,9)] > tbl <-tbl[2: length(tbl[,1]),] > names(tbl) <- c("Address", "Township", "Parcel", "SaleDate", "Costs"); > rownames(tbl) <- NULL > v <- gregexpr("( aka )|( AKA )",tbl$Address) > s <-sapply(v, function(x) max(unlist(x))) > tbl$Address <- substring(tbl$Address, ifelse(s== -1, 0, s+4), 10000) > tbl$Cost <- gsub(',', '', tbl$Costs) > temp <- strsplit(tbl$Cost, "\\$") > temp <- do.call(rbind, temp) ?# create a matrix > mode(temp) <- 'numeric' > tbl$Debt <- round(temp[, 2]/1000,2) > tbl$Court <- round(temp[, 3]/1000,2) > z <- data.frame(substr(tbl$SaleDate,regexpr("[A-Za-z]", tbl$SaleDate), > regexpr("[0-9]", tbl$SaleDate,)-1)) ; names(z) <- "Action" > y <- data.frame(substr(tbl$SaleDate,regexpr("[0-9]", tbl$SaleDate),2011)) ; > names(y) <- "ActionDate" > tbl <-cbind(tbl[, c(1,2,3,7,8)],z,y) > new.add <- paste(tbl$Address,"&citystatezip=",tbl$Township,"%2C+PA", sep='') > new.add <- sub("^( )+","", new.add) > new.add <-data.frame(gsub("( )+",'+', new.add)); names(new.add) <- > "ParseAddress" > hm <- function(x) { > ?url.zill > <-paste("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=",x, > sep="") > ?############## problem line is next ################################# > ?zdoc <-xmlTreeParse(url.zill, useInternalNode=TRUE, isURL=TRUE) > ?############# problem line above ?################################## > ?f$zpid <- sapply(getNodeSet(zdoc, "//result/zpid"), xmlValue) > ?f$zest.low <-sapply(getNodeSet(zdoc, "//valuationRange/low"), xmlValue) > ?f$zest <- sapply(getNodeSet(zdoc, "//zestimate/amount"), xmlValue) > ?rm(zdoc) > ?return(f) > } > j <-sapply(new.add, FUN=hm) > print(zest) > > -- > View this message in context: http://r.789695.n4.nabble.com/Stuck-can-t-get-sapply-and-xmlTreeParse-working-tp3644894p3644894.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >-- Jim Holtman Data Munger Guru What is the problem that you are trying to solve?
Probably this is what you want; convert the first column of 'new.add' to character and then use in the sapply. Now it seems to work in that data is read in, but the new error is that "f" is not defined. What is it supposed to be?> x <- as.character(new.add[[1]]) > z <- sapply(x, hm)Error in f$zpid <- sapply(getNodeSet(zdoc, "//result/zpid"), xmlValue) : object 'f' not found Enter a frame number, or 0 to exit 1: sapply(x, hm) 2: lapply(X, FUN, ...) 3: FUN(c("10+PACER+LN&citystatezip=East+Norriton%2C+PA", "141+ROSEMONT+AVE&citystatezip=Norristown%2C+PA", "6 On Mon, Jul 4, 2011 at 8:42 PM, eric <ericstrom at aol.com> wrote:> Can't seem to get the code below working. It gets stuck on line 24 inside the > function hm; comments show the line in question. The function hm is called > by sapply and is at the bottom of the code. Other stuff above line 24 works > correctly including the first couple of lines of the function hm. Should I > be using a different apply function or am I doing something wrong with > xmlTreeParse ? > > > library(XML) > url.montco <- > "http://webapp.montcopa.org/sherreal/salelist.asp?saledate=07/27/2011" > tbl <-data.frame(readHTMLTable(url.montco))[, c(3,5,6,8,9)] > tbl <-tbl[2: length(tbl[,1]),] > names(tbl) <- c("Address", "Township", "Parcel", "SaleDate", "Costs"); > rownames(tbl) <- NULL > v <- gregexpr("( aka )|( AKA )",tbl$Address) > s <-sapply(v, function(x) max(unlist(x))) > tbl$Address <- substring(tbl$Address, ifelse(s== -1, 0, s+4), 10000) > tbl$Cost <- gsub(',', '', tbl$Costs) > temp <- strsplit(tbl$Cost, "\\$") > temp <- do.call(rbind, temp) ?# create a matrix > mode(temp) <- 'numeric' > tbl$Debt <- round(temp[, 2]/1000,2) > tbl$Court <- round(temp[, 3]/1000,2) > z <- data.frame(substr(tbl$SaleDate,regexpr("[A-Za-z]", tbl$SaleDate), > regexpr("[0-9]", tbl$SaleDate,)-1)) ; names(z) <- "Action" > y <- data.frame(substr(tbl$SaleDate,regexpr("[0-9]", tbl$SaleDate),2011)) ; > names(y) <- "ActionDate" > tbl <-cbind(tbl[, c(1,2,3,7,8)],z,y) > new.add <- paste(tbl$Address,"&citystatezip=",tbl$Township,"%2C+PA", sep='') > new.add <- sub("^( )+","", new.add) > new.add <-data.frame(gsub("( )+",'+', new.add)); names(new.add) <- > "ParseAddress" > hm <- function(x) { > ?url.zill > <-paste("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=",x, > sep="") > ?############## problem line is next ################################# > ?zdoc <-xmlTreeParse(url.zill, useInternalNode=TRUE, isURL=TRUE) > ?############# problem line above ?################################## > ?f$zpid <- sapply(getNodeSet(zdoc, "//result/zpid"), xmlValue) > ?f$zest.low <-sapply(getNodeSet(zdoc, "//valuationRange/low"), xmlValue) > ?f$zest <- sapply(getNodeSet(zdoc, "//zestimate/amount"), xmlValue) > ?rm(zdoc) > ?return(f) > } > j <-sapply(new.add, FUN=hm) > print(zest) > > -- > View this message in context: http://r.789695.n4.nabble.com/Stuck-can-t-get-sapply-and-xmlTreeParse-working-tp3644894p3644894.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >-- Jim Holtman Data Munger Guru What is the problem that you are trying to solve?