Dear all,
I'm currently working on the automated program analysis tool AProVE
(http://aprove.informatik.rwth-aachen.de/) and would like to use LLVM
for analysis of C programs.
I have the following example C program dealing with simple lists of
integers:
------------ start C example -------------
#include<stdlib.h>
struct list_el {
    int val;
    struct list_el * next;
};
typedef struct list_el item;
void test(int length) {
    item * curr, * head;
    int i;
    head = NULL;
    for(i=1;i<=length;i++) {
       curr = (item *)malloc(sizeof(item));
       curr->val = i;
       curr->next = head;
       head = curr;
    }
    curr = head;
    while(curr) {
       curr = curr->next;
    }
}
------------ end C example -------------
The output I get from the LLVM web interface is:
------------ start LLVM output -------------
; ModuleID = '/tmp/webcompile/_27083_0.bc'
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-linux-gnu"
%struct.item = type { i32, %struct.item* }
define void @test(i32 %length) nounwind {
entry:
   %0 = icmp slt i32 %length, 1                    ; <i1> [#uses=1]
   br i1 %0, label %return, label %bb
bb:                                               ; preds = %bb, %entry
   %indvar = phi i32 [ %i.08, %bb ], [ 0, %entry ] ; <i32> [#uses=2]
   %head.07 = phi %struct.item* [ %2, %bb ], [ null, %entry ] ;
<%struct.item*> [#uses=1]
   %i.08 = add i32 %indvar, 1                      ; <i32> [#uses=2]
   %1 = tail call noalias i8* @malloc(i64 16) nounwind ; <i8*> [#uses=3]
   %2 = bitcast i8* %1 to %struct.item*            ; <%struct.item*>
[#uses=3]
   %3 = bitcast i8* %1 to i32*                     ; <i32*> [#uses=1]
   store i32 %i.08, i32* %3, align 8
   %4 = getelementptr inbounds i8* %1, i64 8       ; <i8*> [#uses=1]
   %5 = bitcast i8* %4 to %struct.item**           ; <%struct.item**>
[#uses=1]
   store %struct.item* %head.07, %struct.item** %5, align 8
   %tmp = add i32 %indvar, 2                       ; <i32> [#uses=1]
   %6 = icmp sgt i32 %tmp, %length                 ; <i1> [#uses=1]
   br i1 %6, label %bb4.preheader, label %bb
bb4.preheader:                                    ; preds = %bb
   %7 = icmp eq %struct.item* %2, null             ; <i1> [#uses=1]
   br i1 %7, label %return, label %bb3
bb3:                                              ; preds = %bb3, %bb4.preheader
   %curr.06 = phi %struct.item* [ %9, %bb3 ], [ %2, %bb4.preheader ] ;
<%struct.item*> [#uses=1]
   %8 = getelementptr inbounds %struct.item* %curr.06, i64 0, i32 1 ;
<%struct.item**> [#uses=1]
   %9 = load %struct.item** %8, align 8            ; <%struct.item*>
[#uses=2]
   %10 = icmp eq %struct.item* %9, null            ; <i1> [#uses=1]
   br i1 %10, label %return, label %bb3
return:                                           ; preds = %bb3,
%bb4.preheader, %entry
   ret void
}
declare noalias i8* @malloc(i64) nounwind
------------ end LLVM output -------------
Now my problem is as follows. The pointer %4 (pointing to the second
field in the list element) is calculated by going 8 byte further in
memory. However, i32 only has a size of 4 byte. What is the reason
for the struct having the data layout of two times 8 bytes instead
of having 4 bytes for the integer and 8 bytes for the pointer to the
next element? Is there a general rule for the data layout of structs
in LLVM?
Thank you,
Thomas
-- 
Thomas Ströder        mailto:stroeder at informatik.rwth-aachen.de
LuFG Informatik 2     http://verify.rwth-aachen.de/stroeder
RWTH Aachen           phone: +49 241 80-21241
Thomas Ströder <stroeder at informatik.rwth-aachen.de> writes:> target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" > target triple = "x86_64-linux-gnu"[snip]> Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element?See the datalayout string: p:64:64:64 that means that pointers are aligned to 64 bit boundaries. Your struct ends being like this: offset bytes 0-3 : int data member offset bytes 4-7 : padding offset bytes 8-15: pointer data member> Is there a general rule for the data layout of structs in LLVM?LLVM follows the platform's C ABI, in this case x86_64-linux. That's hard-coded. Don't be lured by Module::setDataLayout.
Hi Thomas,> Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element? Is there a general rule for the data layout of structs > in LLVM?There is no general rule except that the layout is target dependent... TargetData::getStructLayout will give you the StructLayout for a given StructType. StructLayout::getElementOffset then gives you the offset in bytes for an element. In your case, getElementOffset(1) should return 8. The "deeper" reason is that pointers in x86_64 Linux are 8-byte aligned. Hope this helps, Stephan
Hi Thomas, I suggest you read [1] for more information about data structure alignmnent and the reason for your 32 bit gap. Regards, Florian [1] http://en.wikipedia.org/wiki/Data_structure_alignment Am Freitag, 4. Februar 2011, um 15:39:24 schrieb Thomas Ströder:> Dear all, > > I'm currently working on the automated program analysis tool AProVE > (http://aprove.informatik.rwth-aachen.de/) and would like to use LLVM > for analysis of C programs. > > I have the following example C program dealing with simple lists of > integers: > > ------------ start C example ------------- > > #include<stdlib.h> > > struct list_el { > int val; > struct list_el * next; > }; > > typedef struct list_el item; > > void test(int length) { > item * curr, * head; > int i; > > head = NULL; > > for(i=1;i<=length;i++) { > curr = (item *)malloc(sizeof(item)); > curr->val = i; > curr->next = head; > head = curr; > } > > curr = head; > > while(curr) { > curr = curr->next; > } > } > > ------------ end C example ------------- > > > The output I get from the LLVM web interface is: > > ------------ start LLVM output ------------- > > ; ModuleID = '/tmp/webcompile/_27083_0.bc' > target datalayout > "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64 > :64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target > triple = "x86_64-linux-gnu" > > %struct.item = type { i32, %struct.item* } > > define void @test(i32 %length) nounwind { > entry: > %0 = icmp slt i32 %length, 1 ; <i1> [#uses=1] > br i1 %0, label %return, label %bb > > bb: ; preds = %bb, %entry > %indvar = phi i32 [ %i.08, %bb ], [ 0, %entry ] ; <i32> [#uses=2] > %head.07 = phi %struct.item* [ %2, %bb ], [ null, %entry ] ; > <%struct.item*> [#uses=1] %i.08 = add i32 %indvar, 1 > ; <i32> [#uses=2] %1 = tail call noalias i8* @malloc(i64 16) nounwind ; > <i8*> [#uses=3] %2 = bitcast i8* %1 to %struct.item* ; > <%struct.item*> [#uses=3] %3 = bitcast i8* %1 to i32* > ; <i32*> [#uses=1] store i32 %i.08, i32* %3, align 8 > %4 = getelementptr inbounds i8* %1, i64 8 ; <i8*> [#uses=1] > %5 = bitcast i8* %4 to %struct.item** ; <%struct.item**> > [#uses=1] store %struct.item* %head.07, %struct.item** %5, align 8 > %tmp = add i32 %indvar, 2 ; <i32> [#uses=1] > %6 = icmp sgt i32 %tmp, %length ; <i1> [#uses=1] > br i1 %6, label %bb4.preheader, label %bb > > bb4.preheader: ; preds = %bb > %7 = icmp eq %struct.item* %2, null ; <i1> [#uses=1] > br i1 %7, label %return, label %bb3 > > bb3: ; preds = %bb3, > %bb4.preheader %curr.06 = phi %struct.item* [ %9, %bb3 ], [ %2, > %bb4.preheader ] ; <%struct.item*> [#uses=1] %8 = getelementptr inbounds > %struct.item* %curr.06, i64 0, i32 1 ; <%struct.item**> [#uses=1] %9 > load %struct.item** %8, align 8 ; <%struct.item*> [#uses=2] %10 > = icmp eq %struct.item* %9, null ; <i1> [#uses=1] br i1 %10, > label %return, label %bb3 > > return: ; preds = %bb3, > %bb4.preheader, %entry ret void > } > > declare noalias i8* @malloc(i64) nounwind > > ------------ end LLVM output ------------- > > > Now my problem is as follows. The pointer %4 (pointing to the second > field in the list element) is calculated by going 8 byte further in > memory. However, i32 only has a size of 4 byte. What is the reason > for the struct having the data layout of two times 8 bytes instead > of having 4 bytes for the integer and 8 bytes for the pointer to the > next element? Is there a general rule for the data layout of structs > in LLVM? > > Thank you, > Thomas