Hi R helpers, I am trying to extract customer feedback from an e-commerce site and subsequently use it for creating a word cloud. Below is the code I have #web-crawling library(RCurl) library(XML) library(rvest) #web-crawling init=" http://www.flipkart.com/moto-g-2nd-generation/product-reviews/ITME7YBANGAWQZZX?pid=MOBDYGZ6SHNB7RFC&type=all " crawlcandidate="start=" base="http://www.flipkart.com" num=10 doclist=list() anchorlist=vector() j=0 while(j<num){ print(j) if(j==0){ doclist[j+1]=getURL(init) }else{ doclist[j+1]=getURL(paste(base,anchorlist[j+1],sep="")) } doc=htmlParse(doclist[[j+1]]) anchor=getNodeSet(doc,"//a") anchor=sapply(anchor,function(x)xmlGetAttr(x,"href")) anchor=anchor[grep(crawlcandidate,anchor)] anchorlist=c(anchorlist,anchor) anchorlist=unique(anchorlist) j=j+1 } #html_text is for extracting only reviews and ratings reviews=c() ratings=c() for(i in 1:10){ doc=htmlParse(doclist[[i]]) l=getNodeSet(doc,"//div/p/span[@class='review-text']") l1=html_text(l) rateNodes=getNodeSet(doc,"//div[@class='fk-stars']") rates=sapply(rateNodes,function(x)xmlGetAttr(x,'title')) ratings=c(ratings,rates) reviews=c(reviews,l1) } View(reviews) View(ratings) #creating wordcloud #tm,wordcloud corpus=Corpus(VectorSource(reviews[1:100])) corpus=tm_map(corpus,tolower) corpus=tm_map(corpus,removePunctuation) corpus=tm_map(corpus,removeNumbers) corpus=tm_map(corpus,removeWords,stopwords("en")) corpus=Corpus(VectorSource(corpus)) tdm=TermDocumentMatrix(corpus) m=as.matrix(tdm) v=sort(rowSums(m),decreasing=T) d=data.frame(words=names(v),freq=v) wordcloud(d$words,d$freq,max.words=300,colors=brewer.pal(10,"Dark2"),scale=c(3,0.5),random.order=F) But I am getting the error Error in which(value == defs) : argument "code" is missing, with no default In addition: Warning message: XML content does not seem to be XML: '' How can I resolve this error?? Help will be appreciated. Regards, Abhi [[alternative HTML version deleted]]