Hi all,
I think I might have found a potential bug when using SSE intrinsic and
unaligned memory. Here's the code to reproduce it:
#include "llvm/Module.h"
#include "llvm/Intrinsics.h"
#include "llvm/Instructions.h"
#include "llvm/ModuleProvider.h"
#include "llvm/ExecutionEngine/JIT.h"
#include "llvm/Support/LLVMBuilder.h"
using namespace llvm;
static Function *createFunction(Module *module)
{
VectorType *float4Type = VectorType::get(Type::FloatTy, 4);
PointerType *float4PointerType = PointerType::get(float4Type, 0);
std::vector<const Type*> functionArguments;
functionArguments.push_back(float4PointerType);
functionArguments.push_back(float4PointerType);
FunctionType *functionType = FunctionType::get(Type::VoidTy,
functionArguments, false);
Function *function = new Function(functionType,
GlobalValue::InternalLinkage, "", module);
Function::arg_iterator args = function->arg_begin();
BasicBlock *entryBlock = new BasicBlock("", function, 0);
LLVMBuilder *builder = new LLVMBuilder(entryBlock);
{
Value *out_arg = args++;
Value *in_arg = args++;
Function *rcpps = Intrinsic::getDeclaration(module,
Intrinsic::x86_sse_rcp_ps);
Value *in = builder->Insert(new LoadInst(in_arg, "",
false, 1));
Value *out = builder->CreateCall(rcpps, in);
builder->Insert(new StoreInst(out, out_arg, false, 1));
builder->CreateRetVoid();
}
delete builder;
return function;
}
int main(int argc, char **argv)
{
Module *module = new Module("test");
Function *function = createFunction(module);
ExistingModuleProvider *moduleProvider = new
ExistingModuleProvider(module);
ExecutionEngine *executionEngine ExecutionEngine::create(moduleProvider,
false);
float /*__declspec(align(16))*/ in[4] = {2, 2, 2, 2};
float /*__declspec(align(16))*/ out[4] = {1, 1, 1, 1};
void (*func)(float*,float*)
(void(*)(float*,float*))executionEngine->getPointerToFunction(function);
func(out, in);
delete executionEngine;
return 0;
}
It generates the following assembly code:
mov eax,dword ptr [esp+8]
rcpps xmm0,xmmword ptr [eax]
mov eax,dword ptr [esp+4]
movups xmmword ptr [eax],xmm0
ret
Note that even though the LoadInst is specified to have an alignment of 1
(in fact no alignment), the rcpps tries to reference the memory directly,
but it expects aligned memory. If "in" happens to not be 16-byte
aligned, an
exception will be thrown. What really should have been generated is
something like this:
mov eax,dword ptr [esp+8]
movups xmm0,xmmword ptr [eax]
rcpps xmm1,xmmword ptr [eax]
mov eax,dword ptr [esp+4]
movups xmmword ptr [eax],xmm1
ret
Since I'm fairly new to LLVM I'm not entirely sure if this is really a
bug
or something I'm not doing correctly, or whether it's already being
addressed. The following thread appears to talk about something similar:
http://thread.gmane.org/gmane.comp.compilers.llvm.devel/9476/focus=9478
Thank you,
Nicolas Capens
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20080523/4504ac1a/attachment.html>