Hi everyone, I'm interested in variadic functions and how llvm handles them. I discovered that the Clang frontend is doing a great job at lowering the va_arg (precisely __builtin_va_arg) function into target dependent code. I have also seen the va_arg function that exist at IR level. I found some information about va_arg (IR one) that currently does not support all platform. But since 2009, it seems that Windows 64 bits is partially supported. So I tried to play with it and reached the following issue: On Windows 64 bits, when passing arguments through a variadic function, the first four parameters are passed by registers and the others onto the stack. Therefore, the stack is 8 bytes aligned (I guess it's related to the ABI). For example, by debugging the IR code at the end, here's the result right before the call. We clearly see the 8 bytes alignment. rcx : <i64> -6778056391233182162 rdx : <i8*> 0x13E1A4 r8 : <i64*> 0x50f070 r9 : <i64*> 0x50d830 0x2EE070 : <i64*> 0x50d830 0x2EE078 : <i32> 16 0x2EE080 : <i32> 10 0x2EE088 : <i32> 10 0x2EE090 : <i64*> 0x50ee40 When using va_arg (IR) to retrieve these parameters, it does not respect the alignement and tries to access the parameters like they were contiguous in memory. %0 = va_arg i8* %ap2, i64* ; OK %1 = va_arg i8* %ap2, i64* ; OK %2 = va_arg i8* %ap2, i64* ; OK (0x2EE070) %3 = va_arg i8* %ap2, i32 ; OK (0x2EE078) %4 = va_arg i8* %ap2, i32 ; Wrong ! 0x2EE07C %5 = va_arg i8* %ap2, i32 ; Wrong ! 0x2EE080 %6 = va_arg i8* %ap2, i64* ; Wrong ! 0x2EE084 The result can be experienced by running the IR code at then end. E:\test>clang test.ll -o test.exe E:\test>test.exe values : n2 = 16, dna = 0, dnb = 10 n2, dna and dnb are respectively the three i32 variables. Does anyone know how to fix this? Alignment attribute on the variadic function do nothing and the VAArgInst does not support setAlignment() like the AllocaInstr. During my research, I found that when a VAArgInst is being lowered in SelectionDAG::expandVAARG(), the alignment information is retrieved from the va_arg SDNode and the lowering is wrong (in this case). The alignment is set in SelectionDAGBuilder::visitVAArg() where it creates a VAArg DAG using DL.getABITypeAligment(I.getType()) which seems to be the alignment information. DL.getABITypeAligment(I.getType()) returns 4 if the type is i32 and 8 for i64 type. For testing, I forced it to 8 and the IR example below worked fine. Is there some kind of attributes to force function parameters to be aligned contiguously? Or could it be that va_arg alignment is wrongly made using DL.getABITypeAlignment? Thank's in advance for your help. Regards, Gaël Here's is the IR code for testing: target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc18.0.0" %struct.va_list = type { i8* } $"str" = comdat any @"str" = linkonce_odr unnamed_addr constant [38 x i8] c"values : n2 %d, dna = %d, dnb = %d\0A\00", comdat, align 1 declare i32 @printf(i8*, ...) #1 declare void @llvm.va_start(i8*) declare void @llvm.va_end(i8*) ; Function Attrs: nounwind uwtable define i32 @main() #0 { %r = alloca i64 %a = alloca i64 %b = alloca i64 %t = alloca i64 %rPty = alloca i64* %aPty = alloca i64* %bPty = alloca i64* %tPty = alloca i64* store i64* %r, i64** %rPty store i64* %a, i64** %aPty store i64* %b, i64** %bPty store i64* %t, i64** %tPty %rLoad = load i64*, i64** %rPty %aLoad = load i64*, i64** %aPty %bLoad = load i64*, i64** %bPty %tLoad = load i64*, i64** %tPty %ret = alloca i64 %retPty = alloca i64* store i64* %ret, i64** %retPty %load = load i64*, i64** %retPty %bit = bitcast i64* %load to i8* call void (i64, i8*, ...) @variadiquefunc(i64 -6778056391233182162, i8* %bit, i64* %rLoad, i64* %aLoad, i64* %bLoad, i32 16, i32 10, i32 10, i64* %tLoad) ret i32 0 } define internal void @variadiquefunc(i64 %p, i8* %pp, ...) { entry: %ap = alloca %struct.va_list %ap2 = bitcast %struct.va_list* %ap to i8* call void @llvm.va_start(i8* %ap2) %0 = va_arg i8* %ap2, i64* %1 = va_arg i8* %ap2, i64* %2 = va_arg i8* %ap2, i64* %3 = va_arg i8* %ap2, i32 %4 = va_arg i8* %ap2, i32 %5 = va_arg i8* %ap2, i32 %6 = va_arg i8* %ap2, i64* %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([38 x i8], [38 x i8]* @"str", i32 0, i32 0), i32 %3, i32 %4, i32 %5) call void @llvm.va_end(i8* %ap2) ret void } attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less- precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp- math"="false" "no-nans-fp-math"="false" "stack-protector-buffer- size"="8" "target-cpu"="x86-64" "target- features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft- float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise- fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp- math"="false" "no-nans-fp-math"="false" "stack-protector-buffer- size"="8" "target-cpu"="x86-64" "target- features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft- float"="false" }