Hello,
I have an optimisation issue in an llvm IR pass - the issue being that
unnecessary instructions are generated in the final assembly (with -O3).
I want to create the following assembly snippet:
mov dl,BYTE PTR [rsi+rdi*1]
add dl,0x1
adc dl,0x0
mov BYTE PTR [rsi+rdi*1],dl
however what is created is (variant #1):
mov dl,BYTE PTR [rsi+rdx*1]
add dl,0x1
cmp dl,0x1 // <- not needed
adc dl,0x0
mov BYTE PTR [rsi+rdi*1],dl
in variant #2 the following is created:
mov dl,BYTE PTR [rsi+rdi*1]
mov ecx,edx // <- not needed
add cl,0x1 // <- should be done to dl instead
adc dl,0x1
mov BYTE PTR [rsi+rdi*1],dl
Far below are both variants with the full code around it, however the
difference in both variants is this:
//variant1
auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0));
Incr = IRB.CreateAdd(Incr, cf);
//variant2
auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1));
Incr = IRB.CreateAdd(Incr, cf);
//interestingly this totally different approach creates the same
instructions as variant2
CallInst *AddOv = IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_over
flow, Counter, ConstantInt::get(Int8Ty, 1));
AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
Value *SumWithOverflowBit = AddOv;
Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0),
IRB.CreateZExt(IRB.CreateExtractValue(SumWithOverflowBit, 1), Int8Ty));
So - How do I have to write the code that the target code has a chance
of being generated?
For me its the same result on LLVM 6.0 and 7.
Alternatively
add BYTE PTR [rsi+rdi*1],0x1
adc BYTE PTR [rsi+rdi*1],0x0
would work as well.
Thank you very much!
Regards,
Marc
-> Below now is the full llvm IR pass code
// vvv same code before both variants
LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted,
CurLoc));
LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1));
// ^^^ same code before both variants
// CODE FOR VARIANT #1
auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0));
Incr = IRB.CreateAdd(Incr, cf);
// CODE FOR VARIANT #2
auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1));
Incr = IRB.CreateAdd(Incr, cf);
// vvv same code after both variants
IRB.CreateStore(Incr,
MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >>
1), AFLPrevLoc);
Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
// ^^^ same code after both variants
--
Marc Heuse
www.mh-sec.de
PGP: AF3D 1D4C D810 F0BB 977D 3807 C7EE D0A0 6BE9 F573
Craig Topper via llvm-dev
2019-Jul-03 15:33 UTC
[llvm-dev] optimisation issue in an llvm IR pass
Don't the CreateICmp calls return a Value* with an i1 type? But then they are added to an i8 type? Not sure that works. Have you tried using the llvm.uadd.with.overflow.i8 intrinsic? ~Craig On Wed, Jul 3, 2019 at 4:01 AM Marc via llvm-dev <llvm-dev at lists.llvm.org> wrote:> Hello, > > I have an optimisation issue in an llvm IR pass - the issue being that > unnecessary instructions are generated in the final assembly (with -O3). > > I want to create the following assembly snippet: > > mov dl,BYTE PTR [rsi+rdi*1] > add dl,0x1 > adc dl,0x0 > mov BYTE PTR [rsi+rdi*1],dl > > however what is created is (variant #1): > > mov dl,BYTE PTR [rsi+rdx*1] > add dl,0x1 > cmp dl,0x1 // <- not needed > adc dl,0x0 > mov BYTE PTR [rsi+rdi*1],dl > > in variant #2 the following is created: > > mov dl,BYTE PTR [rsi+rdi*1] > mov ecx,edx // <- not needed > add cl,0x1 // <- should be done to dl instead > adc dl,0x1 > mov BYTE PTR [rsi+rdi*1],dl > > Far below are both variants with the full code around it, however the > difference in both variants is this: > > //variant1 > auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); > Incr = IRB.CreateAdd(Incr, cf); > > //variant2 > auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); > Incr = IRB.CreateAdd(Incr, cf); > > //interestingly this totally different approach creates the same > instructions as variant2 > CallInst *AddOv = IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_over > flow, Counter, ConstantInt::get(Int8Ty, 1)); > AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *SumWithOverflowBit = AddOv; > Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0), > IRB.CreateZExt(IRB.CreateExtractValue(SumWithOverflowBit, 1), Int8Ty)); > > So - How do I have to write the code that the target code has a chance > of being generated? > For me its the same result on LLVM 6.0 and 7. > > Alternatively > add BYTE PTR [rsi+rdi*1],0x1 > adc BYTE PTR [rsi+rdi*1],0x0 > would work as well. > > Thank you very much! > > Regards, > Marc > > -> Below now is the full llvm IR pass code > > // vvv same code before both variants > LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); > PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); > > LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); > MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, > CurLoc)); > > LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); > Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > > Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); > // ^^^ same code before both variants > > // CODE FOR VARIANT #1 > auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); > Incr = IRB.CreateAdd(Incr, cf); > > // CODE FOR VARIANT #2 > auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); > Incr = IRB.CreateAdd(Incr, cf); > > // vvv same code after both variants > IRB.CreateStore(Incr, > MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> > 1), AFLPrevLoc); > Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > // ^^^ same code after both variants > > > -- > Marc Heuse > www.mh-sec.de > > PGP: AF3D 1D4C D810 F0BB 977D 3807 C7EE D0A0 6BE9 F573 > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20190703/1673d3bd/attachment.html>
Hi Craig, On 03.07.19 17:33, Craig Topper wrote:> Don't the CreateICmp calls return a Value* with an i1 type? But then > they are added to an i8 type? Not sure that works.I had that initially: auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); auto carry = IRB.CreateZExt(cf, Int8Ty); Incr = IRB.CreateAdd(Incr, carry); it makes no difference to the generated assembly> Have you tried using the llvm.uadd.with.overflow.i8 intrinsic?we have tried this: CallInst *AddOv IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, Counter, ConstantInt::get(Int8Ty, 1)); AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *SumWithOverflowBit = AddOv; Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0), IRB.CreateZExt(IRB.CreateExtractValue(SumWithOverflowBit, 1), Int8Ty)); but that generated exactly the same as auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); Incr = IRB.CreateAdd(Incr, cf); All the above create (with llvm 6.0, 7 and 8): mov dl,BYTE PTR [rsi+rdi*1] mov ecx,edx // <- not needed add cl,0x1 // <- should be done to dl instead adc dl,0x1 mov BYTE PTR [rsi+rdi*1],dl Thanks! Regards, Marc> > ~Craig > > > On Wed, Jul 3, 2019 at 4:01 AM Marc via llvm-dev > <llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>> wrote: > > Hello, > > I have an optimisation issue in an llvm IR pass - the issue being that > unnecessary instructions are generated in the final assembly (with -O3). > > I want to create the following assembly snippet: > > mov dl,BYTE PTR [rsi+rdi*1] > add dl,0x1 > adc dl,0x0 > mov BYTE PTR [rsi+rdi*1],dl > > however what is created is (variant #1): > > mov dl,BYTE PTR [rsi+rdx*1] > add dl,0x1 > cmp dl,0x1 // <- not needed > adc dl,0x0 > mov BYTE PTR [rsi+rdi*1],dl > > in variant #2 the following is created: > > mov dl,BYTE PTR [rsi+rdi*1] > mov ecx,edx // <- not needed > add cl,0x1 // <- should be done to dl instead > adc dl,0x1 > mov BYTE PTR [rsi+rdi*1],dl > > Far below are both variants with the full code around it, however the > difference in both variants is this: > > //variant1 > auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); > Incr = IRB.CreateAdd(Incr, cf); > > //variant2 > auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); > Incr = IRB.CreateAdd(Incr, cf); > > //interestingly this totally different approach creates the same > instructions as variant2 > CallInst *AddOv = IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_over > flow, Counter, ConstantInt::get(Int8Ty, 1)); > AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *SumWithOverflowBit = AddOv; > Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0), > IRB.CreateZExt(IRB.CreateExtractValue(SumWithOverflowBit, 1), Int8Ty)); > > So - How do I have to write the code that the target code has a chance > of being generated? > For me its the same result on LLVM 6.0 and 7. > > Alternatively > add BYTE PTR [rsi+rdi*1],0x1 > adc BYTE PTR [rsi+rdi*1],0x0 > would work as well. > > Thank you very much! > > Regards, > Marc > > -> Below now is the full llvm IR pass code > > // vvv same code before both variants > LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); > PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); > > LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); > MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, > CurLoc)); > > LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); > Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > > Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); > // ^^^ same code before both variants > > // CODE FOR VARIANT #1 > auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); > Incr = IRB.CreateAdd(Incr, cf); > > // CODE FOR VARIANT #2 > auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); > Incr = IRB.CreateAdd(Incr, cf); > > // vvv same code after both variants > IRB.CreateStore(Incr, > MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, > None)); > StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> > 1), AFLPrevLoc); > Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); > // ^^^ same code after both variants > > > -- > Marc Heuse > www.mh-sec.de <http://www.mh-sec.de> > > PGP: AF3D 1D4C D810 F0BB 977D 3807 C7EE D0A0 6BE9 F573 > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org> > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-- Marc Heuse Mobil: +49 177 9611560 Fax: +49 30 37309726 www.mh-sec.de Marc Heuse - IT-Security Consulting Winsstr. 68 10405 Berlin Ust.-Ident.-Nr.: DE244222388 PGP: AF3D 1D4C D810 F0BB 977D 3807 C7EE D0A0 6BE9 F573