syscall::connect:entry { self->to = (char *) copyin(arg1, arg2); self->ftp = ( self->to[0] == 0 && self->to[1] == 2 && self->to[3] == 21 ) ? 1 : 0; self->ugh = ( self->to[0] != 0 || self->to[1] != 2 ); } syscall::connect:return / self->ftp / { printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d\n", walltimestamp, zonename, uid, execname, self->to[0], self->to[1], self->to[3], self->to[4], self->to[5], self->to[6], self->to[7]); } In the printf, self->to contents aren''t always what they were when the variable was assigned. Why not? (Incidentally, i''ve fixed this by changing the code to the following, but i''m curious as to what was wrong above. syscall::connect:entry { to = (char *) copyin(arg1, arg2); self->ftp = ( to[0] == 0 && to[1] == 2 && to[3] == 21 ) ? 1 : 0; self->ugh = ( to[0] != 0 || to[1] != 2 ); } syscall::connect:entry / self->ftp / { to = (char *) copyin(arg1, arg2); printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d %s\n", walltimestamp, zonename, uid, execname, to[0], to[1], to[3], to[4], to[5], to[6], to[7], curpsinfo->pr_psargs); } Also, is there a better way to write the above? (Not so interested in the fbt connect_entry stuff.) -- This message posted from opensolaris.org
Christophe Kalt writes:> syscall::connect:entry > { > self->to = (char *) copyin(arg1, arg2); > self->ftp = ( self->to[0] == 0 && self->to[1] == 2 && self->to[3] == 21 ) ? 1 : 0;Is this on SPARC or x86? That expression is sensitive to byte order: it''s assuming that sin_family is going to be in big-endian order, which is true only on SPARC. (The port number in struct sockaddr_in is always big-endian, because it''s in network byte order.) Not testing self->to[2] for zero seems odd.> In the printf, self->to contents aren''t always what they were when the variable was assigned. Why not?Good question ... for which I don''t have an answer. But for what it''s worth, I always copy the information I want to save across probe clauses into scalars in ''self->''. I don''t really trust pointers here. -- James Carlson, Solaris Networking <james.d.carlson at sun.com> Sun Microsystems / 35 Network Drive 71.232W Vox +1 781 442 2084 MS UBUR02-212 / Burlington MA 01803-2757 42.496N Fax +1 781 442 1677
James Carlson wrote:> Christophe Kalt writes: > > syscall::connect:entry > > { > > self->to = (char *) copyin(arg1, arg2); > > self->ftp = ( self->to[0] == 0 && self->to[1] == 2 && self->to[3] == 21 ) ? 1 : 0; > > Is this on SPARC or x86? That expression is sensitive to byte order: > it''s assuming that sin_family is going to be in big-endian order, > which is true only on SPARC.Yes, i expected to have a problem with this, but my testing showed that this works on both platforms. To be honest, i''m not sure how this can be!> (The port number in struct sockaddr_in is always big-endian, because > it''s in network byte order.) > > Not testing self->to[2] for zero seems odd.True! forgot about it while fighting to get the script to work :-) -- This message posted from opensolaris.org
Hi Christophe, The copyin() action will copy its data to scratch space which is allocated only for the duration of a probe firing. Once you get to the return probe, the data that self->to points to may have been overwritten. You can solve this by making self->to a larger structure that actually contains the relevant data rather a pointer: ---8<--- typedef struct { char p[8]; } to_t; syscall::connect:entry { self->to = *(to_t *)copyin(arg1, sizeof (to_t)); } syscall::connect:return { trace(self->to.p[3]); } ---8<--- Adam On Wed, Jun 04, 2008 at 07:01:54AM -0700, Christophe Kalt wrote:> syscall::connect:entry > { > self->to = (char *) copyin(arg1, arg2); > self->ftp = ( self->to[0] == 0 && self->to[1] == 2 && self->to[3] == 21 ) ? 1 : 0; > self->ugh = ( self->to[0] != 0 || self->to[1] != 2 ); > } > > syscall::connect:return > / self->ftp / > { > printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d\n", > walltimestamp, zonename, uid, execname, > self->to[0], self->to[1], > self->to[3], self->to[4], self->to[5], self->to[6], self->to[7]); > } > > In the printf, self->to contents aren''t always what they were when the variable was assigned. Why not? > > (Incidentally, i''ve fixed this by changing the code to the following, but i''m curious as to what was wrong above. > > syscall::connect:entry > { > to = (char *) copyin(arg1, arg2); > self->ftp = ( to[0] == 0 && to[1] == 2 && to[3] == 21 ) ? 1 : 0; > self->ugh = ( to[0] != 0 || to[1] != 2 ); > } > > syscall::connect:entry > / self->ftp / > { > to = (char *) copyin(arg1, arg2); > printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d %s\n", > walltimestamp, zonename, uid, execname, > to[0], to[1], > to[3], to[4], to[5], to[6], to[7], > curpsinfo->pr_psargs); > } > > Also, is there a better way to write the above? (Not so interested in the fbt connect_entry stuff.) > > > -- > This message posted from opensolaris.org > _______________________________________________ > dtrace-discuss mailing list > dtrace-discuss at opensolaris.org-- Adam Leventhal, Fishworks http://blogs.sun.com/ahl
To be safe, you should take Adam''s advice one level further and not execute the copyin until the connect return. If the structure is not faulted-in memory, the DIF code will hiccup because it can''t take an interrupt for the page-in. And don''t forget to clear all thread-local variables when you are done with them. So something like: ---8<--- typedef struct { char p[8]; } to_t; syscall::connect:entry { self->to = arg1; } syscall::connect:return { trace(((to_t *)copyin(self->to, sizeof (to_t)))->p[3]); self->to = 0; } ---8<--- Chip> -----Original Message----- > From: dtrace-discuss-bounces at opensolaris.org [mailto:dtrace-discuss- > bounces at opensolaris.org] On Behalf Of Adam Leventhal > Sent: Wednesday, June 04, 2008 2:34 PM > To: Christophe Kalt > Cc: dtrace-discuss at opensolaris.org > Subject: Re: [dtrace-discuss] What is wrong with this code? > > Hi Christophe, > > The copyin() action will copy its data to scratch space which is > allocated > only for the duration of a probe firing. Once you get to the return > probe, the > data that self->to points to may have been overwritten. You can solve > this by > making self->to a larger structure that actually contains the relevant > data > rather a pointer: > > ---8<--- > > typedef struct { > char p[8]; > } to_t; > > syscall::connect:entry > { > self->to = *(to_t *)copyin(arg1, sizeof (to_t)); > } > > syscall::connect:return > { > trace(self->to.p[3]); > } > > ---8<--- > > Adam > > > On Wed, Jun 04, 2008 at 07:01:54AM -0700, Christophe Kalt wrote: > > syscall::connect:entry > > { > > self->to = (char *) copyin(arg1, arg2); > > self->ftp = ( self->to[0] == 0 && self->to[1] == 2 && self->to[3] > == 21 ) ? 1 : 0; > > self->ugh = ( self->to[0] != 0 || self->to[1] != 2 ); > > } > > > > syscall::connect:return > > / self->ftp / > > { > > printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d\n", > > walltimestamp, zonename, uid, execname, > > self->to[0], self->to[1], > > self->to[3], self->to[4], self->to[5], self->to[6], self- > >to[7]); > > } > > > > In the printf, self->to contents aren''t always what they were when > the variable was assigned. Why not? > > > > (Incidentally, i''ve fixed this by changing the code to thefollowing,> but i''m curious as to what was wrong above. > > > > syscall::connect:entry > > { > > to = (char *) copyin(arg1, arg2); > > self->ftp = ( to[0] == 0 && to[1] == 2 && to[3] == 21 ) ? 1 : 0; > > self->ugh = ( to[0] != 0 || to[1] != 2 ); > > } > > > > syscall::connect:entry > > / self->ftp / > > { > > to = (char *) copyin(arg1, arg2); > > printf("%-20Y %s %d %s %.2x%.2x port=%d %d.%d.%d.%d %s\n", > > walltimestamp, zonename, uid, execname, > > to[0], to[1], > > to[3], to[4], to[5], to[6], to[7], > > curpsinfo->pr_psargs); > > } > > > > Also, is there a better way to write the above? (Not so interested > in the fbt connect_entry stuff.) > > > > > > -- > > This message posted from opensolaris.org > > _______________________________________________ > > dtrace-discuss mailing list > > dtrace-discuss at opensolaris.org > > -- > Adam Leventhal, Fishworks http://blogs.sun.com/ahl > _______________________________________________ > dtrace-discuss mailing list > dtrace-discuss at opensolaris.org
I have a little test program: See attached. lock_hog uses condition variables. lock_nocv does not use condition variables. But I can uncomment a couple of lines and use condition variables. I have a little script derived from whatfor.d . see attached. My problem is that I expected different sorts of output from whatfor depending upon whether or not I am using condition variables or not but that is not what I am seeing. No condition variables: CPU ID FUNCTION:NAME 0 87550 :tick-10sec lock_hog 18366 kernel-level lock 1 lock_hog 18366 unknown 766 lock_hog 18366 preempted 2447 lock_hog 18366 condition variable 18368 with condition variables: CPU ID FUNCTION:NAME 0 87550 :tick-10sec lock_hog 18416 kernel-level lock 4 lock_hog 18416 user-level lock 4 lock_hog 18416 unknown 868 lock_hog 18416 preempted 2906 lock_hog 18416 condition variable 25541 With plockstat there is a difference with condition variables: arwen:plockstat -C -n 10 -e 10 -p `pgrep lock_hog` 0 Mutex block Count nsec Lock Caller ------------------------------------------------------------------------------- 2695 69781117 lock_hog`prt_lock lock_hog`hello_thread+0x12 851 133297896 lock_hog`prt_lock libc.so.1`pthread_cond_wait+0x1e 684 104551082 lock_hog`prt_lock lock_hog`world_thread+0xf without CVs arwen:plockstat -C -n 10 -e 10 -p `pgrep lock_hog` 0 Mutex block Count nsec Lock Caller ------------------------------------------------------------------------------- 2108 91471723 lock_hog`prt_lock lock_hog`world_thread+0xf 1968 97417958 lock_hog`prt_lock lock_hog`hello_thread+0x12 arwen: So are my expectations incorrect, or is their a bug in my dtrace. Under what sorts of conditions would we see switches for userland locks. Rick -- Rickey C. Weisner Software Development and Performance Specialist Sun Microsystems, INC cell phone: 615-308-1147 email: rick.weisner at sun.com -------------- next part -------------- #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> pthread_mutex_t prt_lock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t prt_cv = PTHREAD_COND_INITIALIZER; int prt = 0; void * hello_thread( void *arg ) { for (;;) { pthread_mutex_lock( &prt_lock ); fprintf(stderr, "hello \n" ); prt = 1; pthread_cond_signal( &prt_cv ); pthread_mutex_unlock( &prt_lock ); //usleep(1); } return( 0 ); } void * world_thread( void *arg ) { for (;;) { pthread_mutex_lock( &prt_lock ); while ( prt == 0 ) pthread_cond_wait( &prt_cv, &prt_lock ); fprintf(stderr, "world\n" ); prt=0; pthread_mutex_unlock( &prt_lock ); } pthread_exit( 0 ); } main( int argc, char *argv[] ) { int i; int n; pthread_attr_t attr; pthread_t tid; int j; int k; sscanf(argv[1],"%d", &k); pthread_attr_init( &attr ); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); for ( j=0; j<k; j++) { if ( n = pthread_create( &tid, &attr, world_thread, NULL ) ) { fprintf( stderr, "pthread_create: %s\n", strerror( n ) ); exit( 1 ); } if ( n = pthread_create( &tid, &attr, hello_thread, NULL ) ) { fprintf( stderr, "pthread_create: %s\n", strerror( n ) ); exit( 1 ); } } pthread_attr_destroy( &attr ); if ( n = pthread_join( tid, NULL ) ) { fprintf( stderr, "pthread_join: %s\n", strerror( n ) ); exit( 1 ); } printf( "\n" ); return( 0 ); } -------------- next part -------------- #!/usr/sbin/dtrace -s sched:::off-cpu / arg1 != 0 && pid == $1 && curlwpsinfo->pr_state == SSLEEP/ { /* * We''re sleeping. Track our sobj type. */ @c[execname,pid,curlwpsinfo->pr_stype == SOBJ_MUTEX ? "kernel-level lock" : curlwpsinfo->pr_stype == SOBJ_RWLOCK ? "rwlock" : curlwpsinfo->pr_stype == SOBJ_CV ? "condition variable" : curlwpsinfo->pr_stype == SOBJ_SEMA ? "semaphore" : curlwpsinfo->pr_stype == SOBJ_USER ? "user-level lock" : curlwpsinfo->pr_stype == SOBJ_USER_PI ? "user-level prio-inheriting lock" : curlwpsinfo->pr_stype == SOBJ_SHUTTLE ? "shuttle" : "unknown"] count(); } sched:::off-cpu / arg1 != 0 && pid == $1 && curlwpsinfo->pr_state == SRUN/ { @c[execname,pid,"preempted"]=count(); } tick-10sec { exit(0); } -------------- next part -------------- #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> pthread_mutex_t prt_lock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t prt_cv = PTHREAD_COND_INITIALIZER; int prt = 0; void * hello_thread( void *arg ) { for (;;) { pthread_mutex_lock( &prt_lock ); fprintf(stderr, "hello \n" ); prt = 1; usleep(1000); //pthread_cond_signal( &prt_cv ); pthread_mutex_unlock( &prt_lock ); } return( 0 ); } void * world_thread( void *arg ) { for (;;) { pthread_mutex_lock( &prt_lock ); //while ( prt == 0 ) //pthread_cond_wait( &prt_cv, &prt_lock ); fprintf(stderr, "world\n" ); prt=0; pthread_mutex_unlock( &prt_lock ); } pthread_exit( 0 ); } main( int argc, char *argv[] ) { int i; int n; pthread_attr_t attr; pthread_t tid; int j; int k; sscanf(argv[1],"%d", &k); pthread_attr_init( &attr ); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); for ( j=0; j<k; j++) { if ( n = pthread_create( &tid, &attr, world_thread, NULL ) ) { fprintf( stderr, "pthread_create: %s\n", strerror( n ) ); exit( 1 ); } if ( n = pthread_create( &tid, &attr, hello_thread, NULL ) ) { fprintf( stderr, "pthread_create: %s\n", strerror( n ) ); exit( 1 ); } } pthread_attr_destroy( &attr ); if ( n = pthread_join( tid, NULL ) ) { fprintf( stderr, "pthread_join: %s\n", strerror( n ) ); exit( 1 ); } printf( "\n" ); return( 0 ); }