Having implemented a GC for HLVM, I am now turning my attention to 
implementing a GC that supports parallelism. To do this, I would like to use 
atomic instructions as well as mutexes. What is the status of LLVM's atomic 
instrinsics (e.g. CAS)? Is anyone using them in real projects?
I realised that an obvious test would be to compile some simple example 
programs with llvm-g++ instead of g++ but this does not work as I had hoped 
(at least not on x86). A mutex-based programs works fine (but is 
substantially slower than gcc) but my wait-free alternative fails with:
$ llvm-g++ -O3 -lpthread waitfree.c -o waitfree
/tmp/cc6t7jaO.o: In function `inc_count(void*)':
(.text+0x2ee): undefined reference to `__sync_add_and_fetch_4'
collect2: ld returned 1 exit status
The source is:
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#define NUM_THREADS  9
#define TCOUNT (1 << 23)
#define COUNT_LIMIT (1 << 25)
volatile int count = 0;
pthread_mutex_t count_mutex;
pthread_cond_t count_threshold_cv;
void *inc_count(void *t) 
{
  int j,i;
  double result=0.0;
  long my_id = (long)t;
  for (i=0; i<TCOUNT; i++) {
    //void *data = malloc(1);
    int count2 = __sync_add_and_fetch(&count, 1);
    /* 
    Check the value of count and signal waiting thread when condition is
    reached.  Note that this occurs while mutex is locked. 
    */
    if (count2 == COUNT_LIMIT) {
      pthread_mutex_lock(&count_mutex);
      pthread_cond_signal(&count_threshold_cv);
      printf("inc_count(): thread %ld, count = %d  Threshold
reached.\n",
            my_id, count2);
      pthread_mutex_unlock(&count_mutex);
    }
    if (count2 & (count2 - 1) == 0)
      printf("inc_count(): thread %ld, count = %d\n", 
            my_id, count);
    //free(data);
    //if (!__sync_bool_compare_and_swap(&count, oldval, newval))
    /* Do some "work" so threads can alternate on mutex lock */
    //fflush(stdout);
    //usleep(1);
    }
  printf("inc_count(): thread %ld exit\n", my_id);
  pthread_exit(NULL);
}
void *watch_count(void *t) 
{
  long my_id = (long)t;
  printf("Starting watch_count(): thread %ld\n", my_id);
  /*
  Lock mutex and wait for signal.  Note that the pthread_cond_wait 
  routine will automatically and atomically unlock mutex while it waits. 
  Also, note that if COUNT_LIMIT is reached before this routine is run by
  the waiting thread, the loop will be skipped to prevent pthread_cond_wait
  from never returning. 
  */
  pthread_mutex_lock(&count_mutex);
  if (count<COUNT_LIMIT) {
    pthread_cond_wait(&count_threshold_cv, &count_mutex);
    printf("watch_count(): thread %ld count now = %d Condition signal 
received.\n", my_id, count);
    }
  pthread_mutex_unlock(&count_mutex);
  pthread_exit(NULL);
}
int main (int argc, char *argv[])
{
  int i, rc;
  pthread_t threads[1+NUM_THREADS];
  pthread_attr_t attr;
  /* Initialize mutex and condition variable objects */
  pthread_mutex_init(&count_mutex, NULL);
  pthread_cond_init (&count_threshold_cv, NULL);
  /* For portability, explicitly create threads in a joinable state */
  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  pthread_create(&threads[0], &attr, watch_count, (void *)1);
  for (int t=1; t<NUM_THREADS; ++t)
    pthread_create(&threads[t], &attr, inc_count, (void *)t);
  /* Wait for all threads to complete */
  for (i=0; i<NUM_THREADS; i++) {
    pthread_join(threads[i], NULL);
  }
  printf ("Main(): Waited on %d threads. Done.\n", NUM_THREADS);
  /* Clean up and exit */
  pthread_attr_destroy(&attr);
  pthread_mutex_destroy(&count_mutex);
  pthread_cond_destroy(&count_threshold_cv);
  pthread_exit(NULL);
}
-- 
Dr Jon Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/?e
Jon Harrop wrote:> Having implemented a GC for HLVM, I am now turning my attention to > implementing a GC that supports parallelism. To do this, I would like to use > atomic instructions as well as mutexes. What is the status of LLVM's atomic > instrinsics (e.g. CAS)? Is anyone using them in real projects? > > I realised that an obvious test would be to compile some simple example > programs with llvm-g++ instead of g++ but this does not work as I had hoped > (at least not on x86). A mutex-based programs works fine (but is > substantially slower than gcc) but my wait-free alternative fails with: > > $ llvm-g++ -O3 -lpthread waitfree.c -o waitfree > /tmp/cc6t7jaO.o: In function `inc_count(void*)': > (.text+0x2ee): undefined reference to `__sync_add_and_fetch_4' > collect2: ld returned 1 exit statusThis probably means that you're target triple starts with i386, where __sync instructions aren't supported, possibly reconfiguring llvm-g++ explicitly with i686 (if that's what you have) will fix your problem. This should prevent the builtins from leaking through the front end (I think... it's also possible there just isn't anything mapped to __sync_add_and_fetch_4 in Intrinsics.td). Luke> > The source is: > > #include <pthread.h> > #include <unistd.h> > #include <stdio.h> > #include <stdlib.h> > > #define NUM_THREADS 9 > #define TCOUNT (1 << 23) > #define COUNT_LIMIT (1 << 25) > > volatile int count = 0; > pthread_mutex_t count_mutex; > pthread_cond_t count_threshold_cv; > > void *inc_count(void *t) > { > int j,i; > double result=0.0; > long my_id = (long)t; > > for (i=0; i<TCOUNT; i++) { > //void *data = malloc(1); > int count2 = __sync_add_and_fetch(&count, 1); > > /* > Check the value of count and signal waiting thread when condition is > reached. Note that this occurs while mutex is locked. > */ > if (count2 == COUNT_LIMIT) { > pthread_mutex_lock(&count_mutex); > pthread_cond_signal(&count_threshold_cv); > printf("inc_count(): thread %ld, count = %d Threshold reached.\n", > my_id, count2); > pthread_mutex_unlock(&count_mutex); > } > if (count2 & (count2 - 1) == 0) > printf("inc_count(): thread %ld, count = %d\n", > my_id, count); > //free(data); > //if (!__sync_bool_compare_and_swap(&count, oldval, newval)) > > /* Do some "work" so threads can alternate on mutex lock */ > //fflush(stdout); > //usleep(1); > } > printf("inc_count(): thread %ld exit\n", my_id); > pthread_exit(NULL); > } > > void *watch_count(void *t) > { > long my_id = (long)t; > > printf("Starting watch_count(): thread %ld\n", my_id); > > /* > Lock mutex and wait for signal. Note that the pthread_cond_wait > routine will automatically and atomically unlock mutex while it waits. > Also, note that if COUNT_LIMIT is reached before this routine is run by > the waiting thread, the loop will be skipped to prevent pthread_cond_wait > from never returning. > */ > pthread_mutex_lock(&count_mutex); > if (count<COUNT_LIMIT) { > pthread_cond_wait(&count_threshold_cv, &count_mutex); > printf("watch_count(): thread %ld count now = %d Condition signal > received.\n", my_id, count); > } > pthread_mutex_unlock(&count_mutex); > pthread_exit(NULL); > } > > int main (int argc, char *argv[]) > { > int i, rc; > pthread_t threads[1+NUM_THREADS]; > pthread_attr_t attr; > > /* Initialize mutex and condition variable objects */ > pthread_mutex_init(&count_mutex, NULL); > pthread_cond_init (&count_threshold_cv, NULL); > > /* For portability, explicitly create threads in a joinable state */ > pthread_attr_init(&attr); > pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); > pthread_create(&threads[0], &attr, watch_count, (void *)1); > for (int t=1; t<NUM_THREADS; ++t) > pthread_create(&threads[t], &attr, inc_count, (void *)t); > > /* Wait for all threads to complete */ > for (i=0; i<NUM_THREADS; i++) { > pthread_join(threads[i], NULL); > } > printf ("Main(): Waited on %d threads. Done.\n", NUM_THREADS); > > /* Clean up and exit */ > pthread_attr_destroy(&attr); > pthread_mutex_destroy(&count_mutex); > pthread_cond_destroy(&count_threshold_cv); > pthread_exit(NULL); > > } >
Your example works fine on Darwin x86-32, configured as i686-apple- darwin9. On Mar 18, 2009, at 11:54 AMPDT, Jon Harrop wrote:> Having implemented a GC for HLVM, I am now turning my attention to > implementing a GC that supports parallelism. To do this, I would > like to use > atomic instructions as well as mutexes. What is the status of LLVM's > atomic > instrinsics (e.g. CAS)? Is anyone using them in real projects? > > I realised that an obvious test would be to compile some simple > example > programs with llvm-g++ instead of g++ but this does not work as I > had hoped > (at least not on x86). A mutex-based programs works fine (but is > substantially slower than gcc) but my wait-free alternative fails > with: > > $ llvm-g++ -O3 -lpthread waitfree.c -o waitfree > /tmp/cc6t7jaO.o: In function `inc_count(void*)': > (.text+0x2ee): undefined reference to `__sync_add_and_fetch_4' > collect2: ld returned 1 exit status > > The source is: > > #include <pthread.h> > #include <unistd.h> > #include <stdio.h> > #include <stdlib.h> > > #define NUM_THREADS 9 > #define TCOUNT (1 << 23) > #define COUNT_LIMIT (1 << 25) > > volatile int count = 0; > pthread_mutex_t count_mutex; > pthread_cond_t count_threshold_cv; > > void *inc_count(void *t) > { > int j,i; > double result=0.0; > long my_id = (long)t; > > for (i=0; i<TCOUNT; i++) { > //void *data = malloc(1); > int count2 = __sync_add_and_fetch(&count, 1); > > /* > Check the value of count and signal waiting thread when condition > is > reached. Note that this occurs while mutex is locked. > */ > if (count2 == COUNT_LIMIT) { > pthread_mutex_lock(&count_mutex); > pthread_cond_signal(&count_threshold_cv); > printf("inc_count(): thread %ld, count = %d Threshold reached. > \n", > my_id, count2); > pthread_mutex_unlock(&count_mutex); > } > if (count2 & (count2 - 1) == 0) > printf("inc_count(): thread %ld, count = %d\n", > my_id, count); > //free(data); > //if (!__sync_bool_compare_and_swap(&count, oldval, newval)) > > /* Do some "work" so threads can alternate on mutex lock */ > //fflush(stdout); > //usleep(1); > } > printf("inc_count(): thread %ld exit\n", my_id); > pthread_exit(NULL); > } > > void *watch_count(void *t) > { > long my_id = (long)t; > > printf("Starting watch_count(): thread %ld\n", my_id); > > /* > Lock mutex and wait for signal. Note that the pthread_cond_wait > routine will automatically and atomically unlock mutex while it > waits. > Also, note that if COUNT_LIMIT is reached before this routine is > run by > the waiting thread, the loop will be skipped to prevent > pthread_cond_wait > from never returning. > */ > pthread_mutex_lock(&count_mutex); > if (count<COUNT_LIMIT) { > pthread_cond_wait(&count_threshold_cv, &count_mutex); > printf("watch_count(): thread %ld count now = %d Condition signal > received.\n", my_id, count); > } > pthread_mutex_unlock(&count_mutex); > pthread_exit(NULL); > } > > int main (int argc, char *argv[]) > { > int i, rc; > pthread_t threads[1+NUM_THREADS]; > pthread_attr_t attr; > > /* Initialize mutex and condition variable objects */ > pthread_mutex_init(&count_mutex, NULL); > pthread_cond_init (&count_threshold_cv, NULL); > > /* For portability, explicitly create threads in a joinable state */ > pthread_attr_init(&attr); > pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); > pthread_create(&threads[0], &attr, watch_count, (void *)1); > for (int t=1; t<NUM_THREADS; ++t) > pthread_create(&threads[t], &attr, inc_count, (void *)t); > > /* Wait for all threads to complete */ > for (i=0; i<NUM_THREADS; i++) { > pthread_join(threads[i], NULL); > } > printf ("Main(): Waited on %d threads. Done.\n", NUM_THREADS); > > /* Clean up and exit */ > pthread_attr_destroy(&attr); > pthread_mutex_destroy(&count_mutex); > pthread_cond_destroy(&count_threshold_cv); > pthread_exit(NULL); > > } > > -- > Dr Jon Harrop, Flying Frog Consultancy Ltd. > http://www.ffconsultancy.com/?e > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
On Wednesday 18 March 2009 18:59:04 Luke Dalessandro wrote:> Jon Harrop wrote: > > $ llvm-g++ -O3 -lpthread waitfree.c -o waitfree > > /tmp/cc6t7jaO.o: In function `inc_count(void*)': > > (.text+0x2ee): undefined reference to `__sync_add_and_fetch_4' > > collect2: ld returned 1 exit status > > This probably means that you're target triple starts with i386, where > __sync instructions aren't supported, possibly reconfiguring llvm-g++ > explicitly with i686 (if that's what you have) will fix your problem. > This should prevent the builtins from leaking through the front end (I > think... it's also possible there just isn't anything mapped to > __sync_add_and_fetch_4 in Intrinsics.td).That fixed it, thanks! -- Dr Jon Harrop, Flying Frog Consultancy Ltd. http://www.ffconsultancy.com/?e