Dear all, I'm currently working on the automated program analysis tool AProVE (http://aprove.informatik.rwth-aachen.de/) and would like to use LLVM for analysis of C programs. I have the following example C program dealing with simple lists of integers: ------------ start C example ------------- #include<stdlib.h> struct list_el { int val; struct list_el * next; }; typedef struct list_el item; void test(int length) { item * curr, * head; int i; head = NULL; for(i=1;i<=length;i++) { curr = (item *)malloc(sizeof(item)); curr->val = i; curr->next = head; head = curr; } curr = head; while(curr) { curr = curr->next; } } ------------ end C example ------------- The output I get from the LLVM web interface is: ------------ start LLVM output ------------- ; ModuleID = '/tmp/webcompile/_27083_0.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-linux-gnu" %struct.item = type { i32, %struct.item* } define void @test(i32 %length) nounwind { entry: %0 = icmp slt i32 %length, 1 ; <i1> [#uses=1] br i1 %0, label %return, label %bb bb: ; preds = %bb, %entry %indvar = phi i32 [ %i.08, %bb ], [ 0, %entry ] ; <i32> [#uses=2] %head.07 = phi %struct.item* [ %2, %bb ], [ null, %entry ] ; <%struct.item*> [#uses=1] %i.08 = add i32 %indvar, 1 ; <i32> [#uses=2] %1 = tail call noalias i8* @malloc(i64 16) nounwind ; <i8*> [#uses=3] %2 = bitcast i8* %1 to %struct.item* ; <%struct.item*> [#uses=3] %3 = bitcast i8* %1 to i32* ; <i32*> [#uses=1] store i32 %i.08, i32* %3, align 8 %4 = getelementptr inbounds i8* %1, i64 8 ; <i8*> [#uses=1] %5 = bitcast i8* %4 to %struct.item** ; <%struct.item**> [#uses=1] store %struct.item* %head.07, %struct.item** %5, align 8 %tmp = add i32 %indvar, 2 ; <i32> [#uses=1] %6 = icmp sgt i32 %tmp, %length ; <i1> [#uses=1] br i1 %6, label %bb4.preheader, label %bb bb4.preheader: ; preds = %bb %7 = icmp eq %struct.item* %2, null ; <i1> [#uses=1] br i1 %7, label %return, label %bb3 bb3: ; preds = %bb3, %bb4.preheader %curr.06 = phi %struct.item* [ %9, %bb3 ], [ %2, %bb4.preheader ] ; <%struct.item*> [#uses=1] %8 = getelementptr inbounds %struct.item* %curr.06, i64 0, i32 1 ; <%struct.item**> [#uses=1] %9 = load %struct.item** %8, align 8 ; <%struct.item*> [#uses=2] %10 = icmp eq %struct.item* %9, null ; <i1> [#uses=1] br i1 %10, label %return, label %bb3 return: ; preds = %bb3, %bb4.preheader, %entry ret void } declare noalias i8* @malloc(i64) nounwind ------------ end LLVM output ------------- Now my problem is as follows. The pointer %4 (pointing to the second field in the list element) is calculated by going 8 byte further in memory. However, i32 only has a size of 4 byte. What is the reason for the struct having the data layout of two times 8 bytes instead of having 4 bytes for the integer and 8 bytes for the pointer to the next element? Is there a general rule for the data layout of structs in LLVM? Thank you, Thomas -- Thomas Ströder mailto:stroeder at informatik.rwth-aachen.de LuFG Informatik 2 http://verify.rwth-aachen.de/stroeder RWTH Aachen phone: +49 241 80-21241
Thomas Ströder <stroeder at informatik.rwth-aachen.de> writes:> target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" > target triple = "x86_64-linux-gnu"[snip]> Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element?See the datalayout string: p:64:64:64 that means that pointers are aligned to 64 bit boundaries. Your struct ends being like this: offset bytes 0-3 : int data member offset bytes 4-7 : padding offset bytes 8-15: pointer data member> Is there a general rule for the data layout of structs in LLVM?LLVM follows the platform's C ABI, in this case x86_64-linux. That's hard-coded. Don't be lured by Module::setDataLayout.
Hi Thomas,> Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element? Is there a general rule for the data layout of structs > in LLVM?There is no general rule except that the layout is target dependent... TargetData::getStructLayout will give you the StructLayout for a given StructType. StructLayout::getElementOffset then gives you the offset in bytes for an element. In your case, getElementOffset(1) should return 8. The "deeper" reason is that pointers in x86_64 Linux are 8-byte aligned. Hope this helps, Stephan
Hi Thomas, I suggest you read [1] for more information about data structure alignmnent and the reason for your 32 bit gap. Regards, Florian [1] http://en.wikipedia.org/wiki/Data_structure_alignment Am Freitag, 4. Februar 2011, um 15:39:24 schrieb Thomas Ströder:> Dear all, > > I'm currently working on the automated program analysis tool AProVE > (http://aprove.informatik.rwth-aachen.de/) and would like to use LLVM > for analysis of C programs. > > I have the following example C program dealing with simple lists of > integers: > > ------------ start C example ------------- > > #include<stdlib.h> > > struct list_el { > int val; > struct list_el * next; > }; > > typedef struct list_el item; > > void test(int length) { > item * curr, * head; > int i; > > head = NULL; > > for(i=1;i<=length;i++) { > curr = (item *)malloc(sizeof(item)); > curr->val = i; > curr->next = head; > head = curr; > } > > curr = head; > > while(curr) { > curr = curr->next; > } > } > > ------------ end C example ------------- > > > The output I get from the LLVM web interface is: > > ------------ start LLVM output ------------- > > ; ModuleID = '/tmp/webcompile/_27083_0.bc' > target datalayout > "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64 > :64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target > triple = "x86_64-linux-gnu" > > %struct.item = type { i32, %struct.item* } > > define void @test(i32 %length) nounwind { > entry: > %0 = icmp slt i32 %length, 1 ; <i1> [#uses=1] > br i1 %0, label %return, label %bb > > bb: ; preds = %bb, %entry > %indvar = phi i32 [ %i.08, %bb ], [ 0, %entry ] ; <i32> [#uses=2] > %head.07 = phi %struct.item* [ %2, %bb ], [ null, %entry ] ; > <%struct.item*> [#uses=1] %i.08 = add i32 %indvar, 1 > ; <i32> [#uses=2] %1 = tail call noalias i8* @malloc(i64 16) nounwind ; > <i8*> [#uses=3] %2 = bitcast i8* %1 to %struct.item* ; > <%struct.item*> [#uses=3] %3 = bitcast i8* %1 to i32* > ; <i32*> [#uses=1] store i32 %i.08, i32* %3, align 8 > %4 = getelementptr inbounds i8* %1, i64 8 ; <i8*> [#uses=1] > %5 = bitcast i8* %4 to %struct.item** ; <%struct.item**> > [#uses=1] store %struct.item* %head.07, %struct.item** %5, align 8 > %tmp = add i32 %indvar, 2 ; <i32> [#uses=1] > %6 = icmp sgt i32 %tmp, %length ; <i1> [#uses=1] > br i1 %6, label %bb4.preheader, label %bb > > bb4.preheader: ; preds = %bb > %7 = icmp eq %struct.item* %2, null ; <i1> [#uses=1] > br i1 %7, label %return, label %bb3 > > bb3: ; preds = %bb3, > %bb4.preheader %curr.06 = phi %struct.item* [ %9, %bb3 ], [ %2, > %bb4.preheader ] ; <%struct.item*> [#uses=1] %8 = getelementptr inbounds > %struct.item* %curr.06, i64 0, i32 1 ; <%struct.item**> [#uses=1] %9 > load %struct.item** %8, align 8 ; <%struct.item*> [#uses=2] %10 > = icmp eq %struct.item* %9, null ; <i1> [#uses=1] br i1 %10, > label %return, label %bb3 > > return: ; preds = %bb3, > %bb4.preheader, %entry ret void > } > > declare noalias i8* @malloc(i64) nounwind > > ------------ end LLVM output ------------- > > > Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element? Is there a general rule for the data layout of structs > in LLVM? > > Thank you, > Thomas