thr3ads.net - llvm dev - [llvm-dev] varargs, the x86, and clang [Sep 2015]

If this information is useful, please help other people find it:
Share via:

Preston Briggs via llvm-dev

2015-Sep-28 23:07 UTC

[llvm-dev] varargs, the x86, and clang

When I use clang on an x86-64 to spit out the LLVM, like this

clang -O -S -emit-llvm varargstest.c


where varargstest.c looks like this

int add_em_up(int count, ...) {
  va_list ap;
  int i, sum;
  va_start(ap, count);
  sum = 0;
  for (i = 0; i < count; i++)
    sum += va_arg(ap, int);
  va_end(ap);
  return sum;
}


I see LLVM that looks like it's been customized for the x86-64,
versus the varargs stuff I was led to expect from the LLVM IR documentation.

define i32 @add_em_up(i32 %count, ...) #0 {
entry:
  %ap = alloca [1 x %struct.__va_list_tag], align 16
  %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
  call void @llvm.va_start(i8* %arraydecay1)
  %cmp7 = icmp sgt i32 %count, 0
  br i1 %cmp7, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap,
i64 0, i64 0, i32 0
  %0 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64
0, i32 3
  %overflow_arg_area_p = getelementptr inbounds [1 x
%struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
  %gp_offset.pre = load i32* %gp_offset_p, align 16
  br label %for.body

for.body:                                         ; preds = %vaarg.end, %
for.body.lr.ph
  %gp_offset = phi i32 [ %gp_offset.pre, %for.body.lr.ph ], [ %gp_offset10,
%vaarg.end ]
  %sum.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %vaarg.end ]
  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %vaarg.end ]
  %fits_in_gp = icmp ult i32 %gp_offset, 41
  br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem

vaarg.in_reg:                                     ; preds = %for.body
  %reg_save_area = load i8** %0, align 16
  %1 = sext i32 %gp_offset to i64
  %2 = getelementptr i8* %reg_save_area, i64 %1
  %3 = add i32 %gp_offset, 8
  store i32 %3, i32* %gp_offset_p, align 16
  br label %vaarg.end

vaarg.in_mem:                                     ; preds = %for.body
  %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
  %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 8
  store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
  br label %vaarg.end

vaarg.end:                                        ; preds = %vaarg.in_mem,
%vaarg.in_reg
  %gp_offset10 = phi i32 [ %3, %vaarg.in_reg ], [ %gp_offset, %vaarg.in_mem
]
  %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area,
%vaarg.in_mem ]
  %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
  %4 = load i32* %vaarg.addr, align 4
  %add = add nsw i32 %4, %sum.09
  %inc = add nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, %count
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %vaarg.end,
%entry
  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %vaarg.end ]
  call void @llvm.va_end(i8* %arraydecay1)
  ret i32 %sum.0.lcssa
}


Notice at the bottom of the block labeled "for.body" that there's
a test
that determines
whether to look for an argument on the stack or in a register. I see
something similar w/ or w/o the -O flag.

This isn't what I was led to expect by the LLVM IR documentation.
Is there a way to avoid this "premature" optimization?
I tried things like -arch mips and -march=mips but get complaints
about unrecognized flags (-arch) or unknown target architecture CPU
'mips'.
Why's that? The man page suggests both should work.

Thanks,
Preston
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150928/fd6d3b15/attachment.html>

mats petersson via llvm-dev

2015-Sep-29 08:06 UTC

head link

[llvm-dev] varargs, the x86, and clang

What is it you are actually trying to do?

Varargs are architecture dependent, so will need to take into account the
calling convention of the architecture it is on.

Your errors about 'mips' is probably based on the build of clang on your
machine?

--
Mats

On 29 September 2015 at 00:07, Preston Briggs via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> When I use clang on an x86-64 to spit out the LLVM, like this
>
> clang -O -S -emit-llvm varargstest.c
>
>
> where varargstest.c looks like this
>
> int add_em_up(int count, ...) {
>   va_list ap;
>   int i, sum;
>   va_start(ap, count);
>   sum = 0;
>   for (i = 0; i < count; i++)
>     sum += va_arg(ap, int);
>   va_end(ap);
>   return sum;
> }
>
>
> I see LLVM that looks like it's been customized for the x86-64,
> versus the varargs stuff I was led to expect from the LLVM IR
> documentation.
>
> define i32 @add_em_up(i32 %count, ...) #0 {
> entry:
>   %ap = alloca [1 x %struct.__va_list_tag], align 16
>   %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
>   call void @llvm.va_start(i8* %arraydecay1)
>   %cmp7 = icmp sgt i32 %count, 0
>   br i1 %cmp7, label %for.body.lr.ph, label %for.end
>
> for.body.lr.ph:                                   ; preds = %entry
>   %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap,
> i64 0, i64 0, i32 0
>   %0 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64
> 0, i32 3
>   %overflow_arg_area_p = getelementptr inbounds [1 x
> %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
>   %gp_offset.pre = load i32* %gp_offset_p, align 16
>   br label %for.body
>
> for.body:                                         ; preds = %vaarg.end, %
> for.body.lr.ph
>   %gp_offset = phi i32 [ %gp_offset.pre, %for.body.lr.ph ], [
> %gp_offset10, %vaarg.end ]
>   %sum.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %vaarg.end ]
>   %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %vaarg.end ]
>   %fits_in_gp = icmp ult i32 %gp_offset, 41
>   br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem
>
> vaarg.in_reg:                                     ; preds = %for.body
>   %reg_save_area = load i8** %0, align 16
>   %1 = sext i32 %gp_offset to i64
>   %2 = getelementptr i8* %reg_save_area, i64 %1
>   %3 = add i32 %gp_offset, 8
>   store i32 %3, i32* %gp_offset_p, align 16
>   br label %vaarg.end
>
> vaarg.in_mem:                                     ; preds = %for.body
>   %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
>   %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 8
>   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
>   br label %vaarg.end
>
> vaarg.end:                                        ; preds = %vaarg.in_mem,
> %vaarg.in_reg
>   %gp_offset10 = phi i32 [ %3, %vaarg.in_reg ], [ %gp_offset,
> %vaarg.in_mem ]
>   %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area,
> %vaarg.in_mem ]
>   %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
>   %4 = load i32* %vaarg.addr, align 4
>   %add = add nsw i32 %4, %sum.09
>   %inc = add nsw i32 %i.08, 1
>   %exitcond = icmp eq i32 %inc, %count
>   br i1 %exitcond, label %for.end, label %for.body
>
> for.end:                                          ; preds = %vaarg.end,
> %entry
>   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %vaarg.end ]
>   call void @llvm.va_end(i8* %arraydecay1)
>   ret i32 %sum.0.lcssa
> }
>
>
> Notice at the bottom of the block labeled "for.body" that
there's a test
> that determines
> whether to look for an argument on the stack or in a register. I see
> something similar w/ or w/o the -O flag.
>
> This isn't what I was led to expect by the LLVM IR documentation.
> Is there a way to avoid this "premature" optimization?
> I tried things like -arch mips and -march=mips but get complaints
> about unrecognized flags (-arch) or unknown target architecture CPU
'mips'.
> Why's that? The man page suggests both should work.
>
> Thanks,
> Preston
>
>
>
>
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150929/467b0b3a/attachment.html>

Joerg Sonnenberger via llvm-dev

2015-Sep-29 11:53 UTC

head link

[llvm-dev] varargs, the x86, and clang

On Mon, Sep 28, 2015 at 04:07:42PM -0700, Preston Briggs via llvm-dev
wrote:> I see LLVM that looks like it's been customized for the x86-64,
> versus the varargs stuff I was led to expect from the LLVM IR
documentation.
This is not an optimisation, but based on the complexity of the ABI and
the limited functionality of the IR varargs support. While it tends to
work for the simple cases, passes of aggregates and unions is very
difficult to get right in many ABIs. Consider it the 80% solution -- it
makes it easy to interact with simple variadic functions or get the
common cases working for a new backend, but it often fails to handle all
the tricky cases.

Joerg

Eric Christopher via llvm-dev

2015-Sep-29 14:41 UTC

head link

[llvm-dev] varargs, the x86, and clang

On Mon, Sep 28, 2015, 4:07 PM Preston Briggs via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> When I use clang on an x86-64 to spit out the LLVM, like this
>
> clang -O -S -emit-llvm varargstest.c
>
>
> where varargstest.c looks like this
>
> int add_em_up(int count, ...) {
>   va_list ap;
>   int i, sum;
>   va_start(ap, count);
>   sum = 0;
>   for (i = 0; i < count; i++)
>     sum += va_arg(ap, int);
>   va_end(ap);
>   return sum;
> }
>
>
> I see LLVM that looks like it's been customized for the x86-64,
> versus the varargs stuff I was led to expect from the LLVM IR
> documentation.
>
> define i32 @add_em_up(i32 %count, ...) #0 {
> entry:
>   %ap = alloca [1 x %struct.__va_list_tag], align 16
>   %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
>   call void @llvm.va_start(i8* %arraydecay1)
>   %cmp7 = icmp sgt i32 %count, 0
>   br i1 %cmp7, label %for.body.lr.ph, label %for.end
>
> for.body.lr.ph:                                   ; preds = %entry
>   %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap,
> i64 0, i64 0, i32 0
>   %0 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64
> 0, i32 3
>   %overflow_arg_area_p = getelementptr inbounds [1 x
> %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
>   %gp_offset.pre = load i32* %gp_offset_p, align 16
>   br label %for.body
>
> for.body:                                         ; preds = %vaarg.end, %
> for.body.lr.ph
>   %gp_offset = phi i32 [ %gp_offset.pre, %for.body.lr.ph ], [
> %gp_offset10, %vaarg.end ]
>   %sum.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %vaarg.end ]
>   %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %vaarg.end ]
>   %fits_in_gp = icmp ult i32 %gp_offset, 41
>   br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem
>
> vaarg.in_reg:                                     ; preds = %for.body
>   %reg_save_area = load i8** %0, align 16
>   %1 = sext i32 %gp_offset to i64
>   %2 = getelementptr i8* %reg_save_area, i64 %1
>   %3 = add i32 %gp_offset, 8
>   store i32 %3, i32* %gp_offset_p, align 16
>   br label %vaarg.end
>
> vaarg.in_mem:                                     ; preds = %for.body
>   %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
>   %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 8
>   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
>   br label %vaarg.end
>
> vaarg.end:                                        ; preds = %vaarg.in_mem,
> %vaarg.in_reg
>   %gp_offset10 = phi i32 [ %3, %vaarg.in_reg ], [ %gp_offset,
> %vaarg.in_mem ]
>   %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area,
> %vaarg.in_mem ]
>   %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
>   %4 = load i32* %vaarg.addr, align 4
>   %add = add nsw i32 %4, %sum.09
>   %inc = add nsw i32 %i.08, 1
>   %exitcond = icmp eq i32 %inc, %count
>   br i1 %exitcond, label %for.end, label %for.body
>
> for.end:                                          ; preds = %vaarg.end,
> %entry
>   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %vaarg.end ]
>   call void @llvm.va_end(i8* %arraydecay1)
>   ret i32 %sum.0.lcssa
> }
>
>
> Notice at the bottom of the block labeled "for.body" that
there's a test
> that determines
> whether to look for an argument on the stack or in a register. I see
> something similar w/ or w/o the -O flag.
>
> This isn't what I was led to expect by the LLVM IR documentation.
> Is there a way to avoid this "premature" optimization?
> I tried things like -arch mips and -march=mips but get complaints
> about unrecognized flags (-arch) or unknown target architecture CPU
'mips'.
> Why's that? The man page suggests both should work.
>
-arch really only works for Darwin. -March works in a way that you aren't
thinking about, basically clang is a cross compiler by default, but you do
need to specify a triple on the command line using -target.

-eric

> Thanks,
> Preston
>
>
>
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150929/cbffe04e/attachment.html>

Preston Briggs via llvm-dev

2015-Sep-29 16:31 UTC

head link

[llvm-dev] varargs, the x86, and clang

When I read the LLVM IR manual, the description of the Variable Argument
Handling Intrinsics and the va_arg instruction lead me to expect IR that
uses va_arg instead of loads and register references.
I was try to get hold of the IR before it becomes too machine dependent,
i.e., before code generation, but it seems as though va_arg is
replaced/expanded very early, before most optimization.
Is there a way I can suppress that expansion? I'd really like to get hold
of the IR with the va_args intact.

My other question regarding -arch and -march I think involves a misleading
(imo) or out-of-date man page.
Using -target is an effective way to get code for other machines.

Preston


On Tue, Sep 29, 2015 at 1:06 AM, mats petersson <mats at
planetcatfish.com>
wrote:
> What is it you are actually trying to do?
>
> Varargs are architecture dependent, so will need to take into account the
> calling convention of the architecture it is on.
>
> Your errors about 'mips' is probably based on the build of clang on
your
> machine?
>
> --
> Mats
>
> On 29 September 2015 at 00:07, Preston Briggs via llvm-dev <
> llvm-dev at lists.llvm.org> wrote:
>
>> When I use clang on an x86-64 to spit out the LLVM, like this
>>
>> clang -O -S -emit-llvm varargstest.c
>>
>>
>> where varargstest.c looks like this
>>
>> int add_em_up(int count, ...) {
>>   va_list ap;
>>   int i, sum;
>>   va_start(ap, count);
>>   sum = 0;
>>   for (i = 0; i < count; i++)
>>     sum += va_arg(ap, int);
>>   va_end(ap);
>>   return sum;
>> }
>>
>>
>> I see LLVM that looks like it's been customized for the x86-64,
>> versus the varargs stuff I was led to expect from the LLVM IR
>> documentation.
>>
>> define i32 @add_em_up(i32 %count, ...) #0 {
>> entry:
>>   %ap = alloca [1 x %struct.__va_list_tag], align 16
>>   %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
>>   call void @llvm.va_start(i8* %arraydecay1)
>>   %cmp7 = icmp sgt i32 %count, 0
>>   br i1 %cmp7, label %for.body.lr.ph, label %for.end
>>
>> for.body.lr.ph:                                   ; preds = %entry
>>   %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag]*
%ap,
>> i64 0, i64 0, i32 0
>>   %0 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0,
>> i64 0, i32 3
>>   %overflow_arg_area_p = getelementptr inbounds [1 x
>> %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
>>   %gp_offset.pre = load i32* %gp_offset_p, align 16
>>   br label %for.body
>>
>> for.body:                                         ; preds = %vaarg.end,
%
>> for.body.lr.ph
>>   %gp_offset = phi i32 [ %gp_offset.pre, %for.body.lr.ph ], [
>> %gp_offset10, %vaarg.end ]
>>   %sum.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %vaarg.end ]
>>   %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %vaarg.end ]
>>   %fits_in_gp = icmp ult i32 %gp_offset, 41
>>   br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem
>>
>> vaarg.in_reg:                                     ; preds = %for.body
>>   %reg_save_area = load i8** %0, align 16
>>   %1 = sext i32 %gp_offset to i64
>>   %2 = getelementptr i8* %reg_save_area, i64 %1
>>   %3 = add i32 %gp_offset, 8
>>   store i32 %3, i32* %gp_offset_p, align 16
>>   br label %vaarg.end
>>
>> vaarg.in_mem:                                     ; preds = %for.body
>>   %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
>>   %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 8
>>   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
>>   br label %vaarg.end
>>
>> vaarg.end:                                        ; preds >>
%vaarg.in_mem, %vaarg.in_reg
>>   %gp_offset10 = phi i32 [ %3, %vaarg.in_reg ], [ %gp_offset,
>> %vaarg.in_mem ]
>>   %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area,
>> %vaarg.in_mem ]
>>   %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
>>   %4 = load i32* %vaarg.addr, align 4
>>   %add = add nsw i32 %4, %sum.09
>>   %inc = add nsw i32 %i.08, 1
>>   %exitcond = icmp eq i32 %inc, %count
>>   br i1 %exitcond, label %for.end, label %for.body
>>
>> for.end:                                          ; preds = %vaarg.end,
>> %entry
>>   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %vaarg.end ]
>>   call void @llvm.va_end(i8* %arraydecay1)
>>   ret i32 %sum.0.lcssa
>> }
>>
>>
>> Notice at the bottom of the block labeled "for.body" that
there's a test
>> that determines
>> whether to look for an argument on the stack or in a register. I see
>> something similar w/ or w/o the -O flag.
>>
>> This isn't what I was led to expect by the LLVM IR documentation.
>> Is there a way to avoid this "premature" optimization?
>> I tried things like -arch mips and -march=mips but get complaints
>> about unrecognized flags (-arch) or unknown target architecture CPU
>> 'mips'.
>> Why's that? The man page suggests both should work.
>>
>> Thanks,
>> Preston
>>
>>
>>
>>
>> _______________________________________________
>> LLVM Developers mailing list
>> llvm-dev at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>>
>>
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150929/423b404c/attachment.html>

Apparently Analagous Threads

Search for more possibly parallel threads

llvm dev - Sep 2015 - varargs, the x86, and clang

[llvm-dev] varargs, the x86, and clang

[llvm-dev] varargs, the x86, and clang

[llvm-dev] varargs, the x86, and clang

[llvm-dev] varargs, the x86, and clang

[llvm-dev] varargs, the x86, and clang

Apparently Analagous Threads