Hello R Community, I've been working on a project that uses pre-made C++ libraries (using STL) in R and build a package. However, I've been getting an unusual segfault that I'm unable to trace its origin. After many attempts of debugging using gdb, commenting out parts (or all of my code) and valgrind, I'm unable to make heads or tails about what I'm doing wrong. In fact, when I comment out my entire C++ libraries except the function and function declaration, I still get the same error. Also, I have the following questions that I wasn't able to find the answers to in the Writing R Extensions guide or the mailing lists. 1) How does R deal with memory allocated by the C++ libraries? Do I have to deallocate them using the linker code (see below)? 2) Is STL automatically recognized by R as a default C++ library? 3) Is it my linker code's fault or the C++ libraries' fault? The C++ libraries run find without any R stuff? I tried gdb with backtracing and valgrind, but I'm unable to determine what's going on. My code structure goes like this along with the error output and how I got the errors in both gdb, valgrind, and R's default error message. Any help would be greatly appreciated (and I hope the e-mail is not too long...I tried to be as detailed as possible so that the R community can better understand the problem that I might be having) I'm running the uncompiled C++ libraries in R 2.10.1 under Linux. And as a general question, where can I find "good" sample implementation of C++ code in R? I looked into the package gbm and it seems to wrap all of its C++ functions around extend "C", which is ill-advised according to the Writing R Extensions manual. ######Files in the package: 1) under src: a set of pre-made C++ libraries (using STL, C++ and .hpp files) + C register code + R<->C++ libraries linker code + Makevars(my version which defines R_PACKAGE environment variable and nothing else) !!!!!Linker Code: #include <cstdlib> #include <cmath> #include <algorithm> #include <vector> #include <iostream> #include <string> #include "puppy.hpp" #include "symbregprimits.hpp" #include <R.h> #include <Rinternals.h> #include <Rdefines.h> using namespace Puppy; //Declare Functions extern "C" { unsigned int evaluateSymbReg(std::vector<Tree>& ioPopulation, Context& ioContext, const std::vector<double>& inX, const std::vector<double>& inF); void RSymbReg(SEXP RPopSize, SEXP RNbrGen, SEXP RNbrPartTournament, SEXP RMaxDepth, SEXP RMinInitDepth, SEXP RMaxInitDepth, SEXP RInitGrowProba, SEXP RCrossoverProba, SEXP RCrossDistribProba, SEXP RMutStdProba, SEXP RMutMaxRegenDepth, SEXP RMutSwapProba, SEXP RMutSwapDistribProba, SEXP RSeed); void RSymbReg(SEXP RPopSize, SEXP RNbrGen, SEXP RNbrPartTournament, SEXP RMaxDepth, SEXP RMinInitDepth, SEXP RMaxInitDepth, SEXP RInitGrowProba, SEXP RCrossoverProba, SEXP RCrossDistribProba, SEXP RMutStdProba, SEXP RMutMaxRegenDepth, SEXP RMutSwapProba, SEXP RMutSwapDistribProba, SEXP RSeed) { // Convert parameters into friendly variables unsigned int lPopSize = INTEGER(RPopSize)[0]; unsigned int lNbrGen = INTEGER(RNbrGen)[0]; unsigned int lNbrPartTournament = INTEGER(RNbrPartTournament)[0]; unsigned int lMaxDepth = INTEGER(RMaxDepth)[0]; unsigned int lMinInitDepth = INTEGER(RMinInitDepth)[0]; unsigned int lMaxInitDepth = INTEGER(RMaxInitDepth)[0]; float lInitGrowProba = (float )REAL(RInitGrowProba)[0]; float lCrossoverProba = (float )REAL(RCrossoverProba)[0]; float lCrossDistribProba = (float )REAL(RCrossDistribProba)[0]; float lMutStdProba = (float )REAL(RMutStdProba)[0]; unsigned int lMutMaxRegenDepth = INTEGER(RMutMaxRegenDepth)[0]; float lMutSwapProba = (float )REAL(RMutSwapProba)[0]; float lMutSwapDistribProba = (float )REAL(RMutSwapDistribProba)[0]; unsigned long lSeed = (unsigned long )INTEGER(RSeed)[0]; // Display message with parameters used Rprintf("BEAGLE Puppy symbolic regression\n"); Rprintf("Copyright 2001-2004 by Christian Gagne and Marc Parizeau\n"); Rprintf("Parameters used are:\n"); Rprintf(" Population size: %u\n",lPopSize); Rprintf(" Number of generations: %u\n",lNbrGen); Rprintf(" Number participants tournament: %u\n",lNbrPartTournament); Rprintf(" Maximum tree depth: %u\n",lMaxDepth); Rprintf(" Minimum tree initialization depth: %u\n",lMinInitDepth); Rprintf(" Maximum tree initialization depth: %u\n",lMaxInitDepth); Rprintf(" Grow-type initialization proba.: %f\n",lInitGrowProba); Rprintf(" Crossover probability: %f\n",lCrossoverProba); Rprintf(" Crossover distribution probability: %f\n",lCrossDistribProba); Rprintf(" Standard (Koza's) mutation proba.: %f\n",lMutStdProba); Rprintf(" Standard mutation max. regeneration depth: %u\n",lMutMaxRegenDepth); Rprintf(" Swap point mutation probability: %f\n",lMutSwapProba); Rprintf(" Swap point mutation distribution proba.: %f\n",lMutSwapDistribProba); Rprintf(" Random number generator seed value: %f\n",lSeed); // Create evolution context Rprintf("Creating evolution context\n"); Context lContext; lContext.mRandom.seed(lSeed); lContext.insert(new Add); lContext.insert(new Subtract); lContext.insert(new Multiply); lContext.insert(new Divide); lContext.insert(new TokenT<double>("X", 0.0)); lContext.insert(new Ephemeral); // Sample equation on 20 random points Rprintf("Sampling equaiton to regress\n"); std::vector<double> lX(20); std::vector<double> lF(20); for(unsigned int i=0; i<lX.size(); ++i) { lX[i] = lContext.mRandom.rollUniform(-1.0, 1.0); lF[i] = lX[i]*(lX[i]*(lX[i]*(lX[i]+1.0)+1.0)+1.0); } // Initialize population std::vector<Tree> lPopulation(lPopSize); Rprintf("Initializing population\n"); initializePopulation(lPopulation, lContext, lInitGrowProba, lMinInitDepth, lMaxInitDepth); evaluateSymbReg(lPopulation, lContext, lX, lF); calculateStats(lPopulation, 0); // Evole population Rprintf("Starting evolution\n"); for(unsigned int i=1; i<=lNbrGen; ++i) { applySelectionTournament(lPopulation, lContext, lNbrPartTournament); applyCrossover(lPopulation, lContext, lCrossoverProba, lCrossDistribProba, lMaxDepth); applyMutationStandard(lPopulation, lContext, lMutStdProba, lMutMaxRegenDepth, lMaxDepth); applyMutationSwap(lPopulation, lContext, lMutSwapProba, lMutSwapDistribProba); evaluateSymbReg(lPopulation, lContext, lX, lF); calculateStats(lPopulation, i); } // Output best individual std::vector<Tree>::const_iterator lBestIndividual std::max_element(lPopulation.begin(), lPopulation.end()); Rprintf("Best individual at generation %u is: ", lNbrGen); //Rprintf("%s\n", *lBestIndividual); Rprintf("Exiting program\n"); } /*! * \brief Evaluate fitness of a population * \param ioPopulation Population to evaluate fitness. * \param t Evolutionary context. * \param inX Independant sample values for evaluation. * \param inF Dependant sample values for evaluation. * \return Number of fitness evaluated. * \ingroup SymbReg */ unsigned int evaluateSymbReg(std::vector<Tree>& ioPopulation, Context& ioContext, const std::vector<double>& inX, const std::vector<double>& inF) { assert(inX.size() == inF.size()); unsigned int lNbrEval = 0; for(unsigned int i=0; i<ioPopulation.size(); ++i) { if(ioPopulation[i].mValid) continue; double lQuadErr = 0.0; for(unsigned int j=0; j<inX.size(); ++j) { ioContext.mPrimitiveMap["X"]->setValue(&inX[j]); double lResult = 0.0; ioPopulation[i].interpret(&lResult, ioContext); double lErr = lResult - inF[j]; lQuadErr += (lErr * lErr); } double lRMS = std::sqrt(lQuadErr / inX.size()); ioPopulation[i].mFitness = 1. / (1. + lRMS); ioPopulation[i].mValid = true; ++lNbrEval; } return lNbrEval; } } //End Extern !!!!!!!!C-Register code (to register the C++ routines into the R libraries): #include <R.h> #include <Rinternals.h> #include <R_ext/Rdynload.h> void RSymbReg(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static R_FortranMethodDef FortEntries[] = { {NULL, NULL, 0} }; static R_CMethodDef cEntries[] = { {NULL, NULL, 0, NULL} }; static R_CallMethodDef callEntries[] = { {"RSymbReg", (DL_FUNC) &RSymbReg, 14}, {NULL, NULL, 0} }; void R_init_testing(DllInfo *info) { R_registerRoutines(info, cEntries, callEntries, FortEntries, NULL); } void R_unload_testing(DllInfo *info) { } !!!!!!!!Makevars: PKG_CXXFLAGS="-DR_PACKAGE=1" 2) under R: one .R file which calls the function in the linker code using the .call function: SymbReg <- function(popSize = 100, nbrGen = 50, nbrPartTournament = 2, maxDepth = 17, minInitDepth = 2, maxInitDepth = 5, initGrowProba = 0.5, crossoverProba = 0.9, crossDistribProba = 0.9, mutStdProba = 0.05, mutMaxRegenDepth = 5, mutSwapProba = 0.05, mutSwapDistribProba = 0.5, seed = 0) { .Call("RSymbReg", as.integer(popSize), as.integer(nbrGen), as.integer(nbrPartTournament), as.integer(maxDepth), as.integer(minInitDepth), as.integer(maxInitDepth), as.double(initGrowProba), as.double(crossoverProba), as.double(crossDistribProba), as.double(mutStdProba), as.integer(mutMaxRegenDepth), as.double(mutSwapProba), as.double(mutSwapDistribProba), as.integer(seed), PACKAGE = "RBeaglePuppy") } 3) NAMESPACE: contains this line export(R_function_in_dot_R file) usedynlib(Package_Name) #######Installation output: typing R CMD INSTALL package_file_tar.gz, ignore the warning about my .Rd not being complete. I didn't have the chance to work on it. * installing *source* package ‘RBeaglePuppy’ ... ** libs g++ -I/usr/share/R/include "-DR_PACKAGE=1" -fpic -g -O2 -c Primitive.cpp -o Primitive.o g++ -I/usr/share/R/include "-DR_PACKAGE=1" -fpic -g -O2 -c Puppy.cpp -o Puppy.o gcc -std=gnu99 -I/usr/share/R/include -fpic -g -O2 -c RBeaglePuppy_init.c -o RBeaglePuppy_init.o g++ -I/usr/share/R/include "-DR_PACKAGE=1" -fpic -g -O2 -c RSymbRegMain.cpp -o RSymbRegMain.o g++ -I/usr/share/R/include "-DR_PACKAGE=1" -fpic -g -O2 -c SymbRegPrimits.cpp -o SymbRegPrimits.o g++ -I/usr/share/R/include "-DR_PACKAGE=1" -fpic -g -O2 -c Tree.cpp -o Tree.o g++ -shared -o RBeaglePuppy.so Primitive.o Puppy.o RBeaglePuppy_init.o RSymbRegMain.o SymbRegPrimits.o Tree.o -L/usr/lib/R/lib -lR ** R ** preparing package for lazy loading ** help Warning: ./man/RBeaglePuppy-package.Rd:34: All text must be in a section Warning: ./man/RBeaglePuppy-package.Rd:35: All text must be in a section *** installing help indices ** building package indices ... * DONE (RBeaglePuppy) ######When using the package in R: library(RBeaglePuppy) SymbReg() #The function in .R file of the package #####Error message (from R): *** caught segfault *** address 0x18, cause 'memory not mapped' Possible actions: 1: abort (with core dump, if enabled) 2: normal R exit 3: exit R without saving workspace 4: exit R saving workspace #####The gdb output: typed in R -d gdb --vanilla run # Instantiates R library(RBeaglePuppy) SymbReg() #The function in .R file of the package Program received signal SIGSEGV, Segmentation fault. 0x00290073 in ?? () from /usr/lib/R/lib/libR.so (gdb) bt #0 0x00290073 in ?? () from /usr/lib/R/lib/libR.so #1 0x00291974 in ?? () from /usr/lib/R/lib/libR.so #2 0x002929b3 in ?? () from /usr/lib/R/lib/libR.so #3 0x00229b32 in Rf_ReplIteration () from /usr/lib/R/lib/libR.so #4 0x00229db0 in ?? () from /usr/lib/R/lib/libR.so #5 0x00229e65 in run_Rmainloop () from /usr/lib/R/lib/libR.so #6 0x00229e8c in Rf_mainloop () from /usr/lib/R/lib/libR.so #7 0x080487d8 in main () #8 0x00461b56 in __libc_start_main (main=0x80487a0 <main>, argc=2, ubp_av=0xbffff654, init=0x8048800 <__libc_csu_init>, fini=0x80487f0 <__libc_csu_fini>, rtld_fini=0x11dd20 <_dl_fini>, stack_end=0xbffff64c) at libc-start.c:220 #9 0x080486d1 in _start () ###### Valgrind output: typed in R -d valgrind --vanilla < Sample_Code.R, the Sample_Code calls the R function in the .R package ==14277== Invalid read of size 1 ==14277== at 0x412880B: SET_SYMVALUE (in /usr/lib/R/lib/libR.so) ==14277== by 0x4125A72: Rf_ReplIteration (in /usr/lib/R/lib/libR.so) ==14277== by 0x4125DAF: ??? (in /usr/lib/R/lib/libR.so) ==14277== by 0x4125E64: run_Rmainloop (in /usr/lib/R/lib/libR.so) ==14277== by 0x4125E8B: Rf_mainloop (in /usr/lib/R/lib/libR.so) ==14277== by 0x80487D7: main (in /usr/lib/R/bin/exec/R) ==14277== Address 0x13 is not stack'd, malloc'd or (recently) free'd ==14277= *** caught segfault *** address 0x13, cause 'memory not mapped' aborting ... ==14277===14277== HEAP SUMMARY: ==14277== in use at exit: 12,630,307 bytes in 5,764 blocks ==14277== total heap usage: 132,929 allocs, 127,165 frees, 30,101,025 bytes allocated ==14277===14277== LEAK SUMMARY: ==14277== definitely lost: 80 bytes in 2 blocks ==14277== indirectly lost: 240 bytes in 20 blocks ==14277== possibly lost: 10,342,540 bytes in 5,329 blocks ==14277== still reachable: 2,287,447 bytes in 413 blocks ==14277== suppressed: 0 bytes in 0 blocks ==14277== Rerun with --leak-check=full to see details of leaked memory ==14277===14277== For counts of detected and suppressed errors, rerun with: -v ==14277== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 70 from 13) Segmentation fault Thanks for the help! Hyunseung Kang [[alternative HTML version deleted]]