thr3ads.net - llvm dev - [llvm-dev] [RFC] New pass: LoopExitValues [Sep 2015]

If this information is useful, please help other people find it:
Share via:

Steve King via llvm-dev

2015-Sep-03 03:27 UTC

[llvm-dev] [RFC] New pass: LoopExitValues

On Wed, Sep 2, 2015 at 5:36 AM, James Molloy <james at jamesmolloy.co.uk>
wrote:> Hi,
>
> Coremark really isn't a good enough test - have you run the LLVM test
suite
> with this patch, and what were the performance differences?
For the test suite single source benches, the 235 tests improved
performance, 2 regressed and 705 were unchanged.  That seems very
optimistic. Comparing consecutive runs with identical setting shows
there is a lot of noise in the performance data.  Tips for stable
results would be appreciated.

> I'm still a bit confused about what pattern exactly this pass is
supposed to
> trigger on. I understand the mechanics, but I still can't quite see
what
> patterns it would be useful on. You've mentioned matrix multiply - how
does
> this pass alter the IR?
Here's before and after IR for the matrix_mul example.  Notice the two
bitcasts %1 and %2 generated in the for.cond.cleanup block.  The L.E.V
pass converts these to scevgep values that already exist.

*** Code after LSR ***

; Function Attrs: nounwind optsize
define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
readonly %Src, i32 %Val) #0 {
entry:
  %cmp.25 = icmp eq i32 %Size, 0
  br i1 %cmp.25, label %for.cond.cleanup, label %for.body.4.lr.ph.preheader

for.body.4.lr.ph.preheader:                       ; preds = %entry
  %0 = shl i32 %Size, 2
  br label %for.body.4.lr.ph

for.body.4.lr.ph:                                 ; preds
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
  %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [ %2,
%for.cond.cleanup.3 ]
  %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [ %1,
%for.cond.cleanup.3 ]
  %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
%for.body.4.lr.ph.preheader ]
  %lsr.iv56 = bitcast i32* %lsr.iv5 to i1*
  %lsr.iv12 = bitcast i32* %lsr.iv1 to i1*
  br label %for.body.4

for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup.3
  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds
%for.cond.cleanup.loopexit, %entry
  ret void

for.cond.cleanup.3:                               ; preds = %for.body.4
  %inc10 = add nuw nsw i32 %Outer.026, 1
  %scevgep = getelementptr i1, i1* %lsr.iv12, i32 %0
  %1 = bitcast i1* %scevgep to i32*
  %scevgep7 = getelementptr i1, i1* %lsr.iv56, i32 %0
  %2 = bitcast i1* %scevgep7 to i32*
  %exitcond27 = icmp eq i32 %inc10, %Size
  br i1 %exitcond27, label %for.cond.cleanup.loopexit, label %for.body.4.lr.ph

for.body.4:                                       ; preds %for.body.4,
%for.body.4.lr.ph
  %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
%for.body.4.lr.ph ]
  %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
%for.body.4.lr.ph ]
  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size, %for.body.4.lr.ph ]
  %3 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
  %mul5 = mul i32 %3, %Val
  store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
  %lsr.iv.next = add i32 %lsr.iv, -1
  %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
  %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
  %exitcond = icmp eq i32 %lsr.iv.next, 0
  br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4
}


*** Code after Loop Exit Values Optimization **

; Function Attrs: nounwind optsize
define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
readonly %Src, i32 %Val) #0 {
entry:
  %cmp.25 = icmp eq i32 %Size, 0
  br i1 %cmp.25, label %for.cond.cleanup, label %for.body.4.lr.ph.preheader

for.body.4.lr.ph.preheader:                       ; preds = %entry
  br label %for.body.4.lr.ph

for.body.4.lr.ph:                                 ; preds
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
  %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [
%scevgep9, %for.cond.cleanup.3 ]
  %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [
%scevgep4, %for.cond.cleanup.3 ]
  %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
%for.body.4.lr.ph.preheader ]
  br label %for.body.4

for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup.3
  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds
%for.cond.cleanup.loopexit, %entry
  ret void

for.cond.cleanup.3:                               ; preds = %for.body.4
  %inc10 = add nuw nsw i32 %Outer.026, 1
  %exitcond27 = icmp eq i32 %inc10, %Size
  br i1 %exitcond27, label %for.cond.cleanup.loopexit, label %for.body.4.lr.ph

for.body.4:                                       ; preds %for.body.4,
%for.body.4.lr.ph
  %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
%for.body.4.lr.ph ]
  %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
%for.body.4.lr.ph ]
  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size, %for.body.4.lr.ph ]
  %0 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
  %mul5 = mul i32 %0, %Val
  store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
  %lsr.iv.next = add i32 %lsr.iv, -1
  %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
  %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
  %exitcond = icmp eq i32 %lsr.iv.next, 0
  br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4

> What value is it avoiding being recomputed?I'm not precisely sure, but it's residue from LSR.  The pass checks
all computable SCEV values when a loop exits and in this case found
GEPs with the same value.
> How does this pass affect register pressure?
> Also, your example just removes a mov and an add - the push/pops are just
> register allocation (unless your pass in fact *reduces* register pressure?)
Right, the computation eliminated is the mov and add.  Register
savings is a byproduct.

Regards,
-steve

Steve King via llvm-dev

2015-Sep-10 21:35 UTC

head link

[llvm-dev] [RFC] New pass: LoopExitValues

Hello LLVM,
It seems this thread has gone cold.  Is there some low risk way for
the community to take the new pass for a test drive?
Regards,
-steve

On Wed, Sep 2, 2015 at 8:27 PM, Steve King <steve at metrokings.com>
wrote:> On Wed, Sep 2, 2015 at 5:36 AM, James Molloy <james at
jamesmolloy.co.uk> wrote:
>> Hi,
>>
>> Coremark really isn't a good enough test - have you run the LLVM
test suite
>> with this patch, and what were the performance differences?
>
> For the test suite single source benches, the 235 tests improved
> performance, 2 regressed and 705 were unchanged.  That seems very
> optimistic. Comparing consecutive runs with identical setting shows
> there is a lot of noise in the performance data.  Tips for stable
> results would be appreciated.
>
>
>> I'm still a bit confused about what pattern exactly this pass is
supposed to
>> trigger on. I understand the mechanics, but I still can't quite see
what
>> patterns it would be useful on. You've mentioned matrix multiply -
how does
>> this pass alter the IR?
>
> Here's before and after IR for the matrix_mul example.  Notice the two
> bitcasts %1 and %2 generated in the for.cond.cleanup block.  The L.E.V
> pass converts these to scevgep values that already exist.
>
> *** Code after LSR ***
>
> ; Function Attrs: nounwind optsize
> define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
> readonly %Src, i32 %Val) #0 {
> entry:
>   %cmp.25 = icmp eq i32 %Size, 0
>   br i1 %cmp.25, label %for.cond.cleanup, label %for.body.4.lr.ph.preheader
>
> for.body.4.lr.ph.preheader:                       ; preds = %entry
>   %0 = shl i32 %Size, 2
>   br label %for.body.4.lr.ph
>
> for.body.4.lr.ph:                                 ; preds >
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
>   %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [ %2,
> %for.cond.cleanup.3 ]
>   %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [ %1,
> %for.cond.cleanup.3 ]
>   %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
> %for.body.4.lr.ph.preheader ]
>   %lsr.iv56 = bitcast i32* %lsr.iv5 to i1*
>   %lsr.iv12 = bitcast i32* %lsr.iv1 to i1*
>   br label %for.body.4
>
> for.cond.cleanup.loopexit:                        ; preds =
%for.cond.cleanup.3
>   br label %for.cond.cleanup
>
> for.cond.cleanup:                                 ; preds >
%for.cond.cleanup.loopexit, %entry
>   ret void
>
> for.cond.cleanup.3:                               ; preds = %for.body.4
>   %inc10 = add nuw nsw i32 %Outer.026, 1
>   %scevgep = getelementptr i1, i1* %lsr.iv12, i32 %0
>   %1 = bitcast i1* %scevgep to i32*
>   %scevgep7 = getelementptr i1, i1* %lsr.iv56, i32 %0
>   %2 = bitcast i1* %scevgep7 to i32*
>   %exitcond27 = icmp eq i32 %inc10, %Size
>   br i1 %exitcond27, label %for.cond.cleanup.loopexit, label
%for.body.4.lr.ph
>
> for.body.4:                                       ; preds > %for.body.4,
%for.body.4.lr.ph
>   %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
> %for.body.4.lr.ph ]
>   %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
> %for.body.4.lr.ph ]
>   %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size,
%for.body.4.lr.ph ]
>   %3 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
>   %mul5 = mul i32 %3, %Val
>   store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
>   %lsr.iv.next = add i32 %lsr.iv, -1
>   %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
>   %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
>   %exitcond = icmp eq i32 %lsr.iv.next, 0
>   br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4
> }
>
>
> *** Code after Loop Exit Values Optimization **
>
> ; Function Attrs: nounwind optsize
> define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
> readonly %Src, i32 %Val) #0 {
> entry:
>   %cmp.25 = icmp eq i32 %Size, 0
>   br i1 %cmp.25, label %for.cond.cleanup, label %for.body.4.lr.ph.preheader
>
> for.body.4.lr.ph.preheader:                       ; preds = %entry
>   br label %for.body.4.lr.ph
>
> for.body.4.lr.ph:                                 ; preds >
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
>   %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [
> %scevgep9, %for.cond.cleanup.3 ]
>   %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [
> %scevgep4, %for.cond.cleanup.3 ]
>   %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
> %for.body.4.lr.ph.preheader ]
>   br label %for.body.4
>
> for.cond.cleanup.loopexit:                        ; preds =
%for.cond.cleanup.3
>   br label %for.cond.cleanup
>
> for.cond.cleanup:                                 ; preds >
%for.cond.cleanup.loopexit, %entry
>   ret void
>
> for.cond.cleanup.3:                               ; preds = %for.body.4
>   %inc10 = add nuw nsw i32 %Outer.026, 1
>   %exitcond27 = icmp eq i32 %inc10, %Size
>   br i1 %exitcond27, label %for.cond.cleanup.loopexit, label
%for.body.4.lr.ph
>
> for.body.4:                                       ; preds > %for.body.4,
%for.body.4.lr.ph
>   %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
> %for.body.4.lr.ph ]
>   %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
> %for.body.4.lr.ph ]
>   %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size,
%for.body.4.lr.ph ]
>   %0 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
>   %mul5 = mul i32 %0, %Val
>   store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
>   %lsr.iv.next = add i32 %lsr.iv, -1
>   %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
>   %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
>   %exitcond = icmp eq i32 %lsr.iv.next, 0
>   br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4
>
>
>> What value is it avoiding being recomputed?
> I'm not precisely sure, but it's residue from LSR.  The pass checks
> all computable SCEV values when a loop exits and in this case found
> GEPs with the same value.
>
>> How does this pass affect register pressure?
>> Also, your example just removes a mov and an add - the push/pops are
just
>> register allocation (unless your pass in fact *reduces* register
pressure?)
>
> Right, the computation eliminated is the mov and add.  Register
> savings is a byproduct.
>
> Regards,
> -steve

Jake VanAdrighem via llvm-dev

2015-Sep-10 22:43 UTC

head link

[llvm-dev] [RFC] New pass: LoopExitValues

Which cases does this pass handle which aren't otherwise optimized out by
passes like GlobalValueNumbering or DeadCodeElimination?

Thanks,
Jake VanAdrighem

On Thu, Sep 10, 2015 at 2:35 PM, Steve King <steve at metrokings.com>
wrote:
> Hello LLVM,
> It seems this thread has gone cold.  Is there some low risk way for
> the community to take the new pass for a test drive?
> Regards,
> -steve
>
> On Wed, Sep 2, 2015 at 8:27 PM, Steve King <steve at metrokings.com>
wrote:
> > On Wed, Sep 2, 2015 at 5:36 AM, James Molloy <james at
jamesmolloy.co.uk>
> wrote:
> >> Hi,
> >>
> >> Coremark really isn't a good enough test - have you run the
LLVM test
> suite
> >> with this patch, and what were the performance differences?
> >
> > For the test suite single source benches, the 235 tests improved
> > performance, 2 regressed and 705 were unchanged.  That seems very
> > optimistic. Comparing consecutive runs with identical setting shows
> > there is a lot of noise in the performance data.  Tips for stable
> > results would be appreciated.
> >
> >
> >> I'm still a bit confused about what pattern exactly this pass
is
> supposed to
> >> trigger on. I understand the mechanics, but I still can't
quite see what
> >> patterns it would be useful on. You've mentioned matrix
multiply - how
> does
> >> this pass alter the IR?
> >
> > Here's before and after IR for the matrix_mul example.  Notice the
two
> > bitcasts %1 and %2 generated in the for.cond.cleanup block.  The L.E.V
> > pass converts these to scevgep values that already exist.
> >
> > *** Code after LSR ***
> >
> > ; Function Attrs: nounwind optsize
> > define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
> > readonly %Src, i32 %Val) #0 {
> > entry:
> >   %cmp.25 = icmp eq i32 %Size, 0
> >   br i1 %cmp.25, label %for.cond.cleanup, label
> %for.body.4.lr.ph.preheader
> >
> > for.body.4.lr.ph.preheader:                       ; preds = %entry
> >   %0 = shl i32 %Size, 2
> >   br label %for.body.4.lr.ph
> >
> > for.body.4.lr.ph:                                 ; preds > >
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
> >   %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [ %2,
> > %for.cond.cleanup.3 ]
> >   %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [ %1,
> > %for.cond.cleanup.3 ]
> >   %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
> > %for.body.4.lr.ph.preheader ]
> >   %lsr.iv56 = bitcast i32* %lsr.iv5 to i1*
> >   %lsr.iv12 = bitcast i32* %lsr.iv1 to i1*
> >   br label %for.body.4
> >
> > for.cond.cleanup.loopexit:                        ; preds >
%for.cond.cleanup.3
> >   br label %for.cond.cleanup
> >
> > for.cond.cleanup:                                 ; preds > >
%for.cond.cleanup.loopexit, %entry
> >   ret void
> >
> > for.cond.cleanup.3:                               ; preds =
%for.body.4
> >   %inc10 = add nuw nsw i32 %Outer.026, 1
> >   %scevgep = getelementptr i1, i1* %lsr.iv12, i32 %0
> >   %1 = bitcast i1* %scevgep to i32*
> >   %scevgep7 = getelementptr i1, i1* %lsr.iv56, i32 %0
> >   %2 = bitcast i1* %scevgep7 to i32*
> >   %exitcond27 = icmp eq i32 %inc10, %Size
> >   br i1 %exitcond27, label %for.cond.cleanup.loopexit, label %
> for.body.4.lr.ph
> >
> > for.body.4:                                       ; preds > >
%for.body.4, %for.body.4.lr.ph
> >   %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
> > %for.body.4.lr.ph ]
> >   %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
> > %for.body.4.lr.ph ]
> >   %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size, %
> for.body.4.lr.ph ]
> >   %3 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
> >   %mul5 = mul i32 %3, %Val
> >   store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
> >   %lsr.iv.next = add i32 %lsr.iv, -1
> >   %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
> >   %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
> >   %exitcond = icmp eq i32 %lsr.iv.next, 0
> >   br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4
> > }
> >
> >
> > *** Code after Loop Exit Values Optimization **
> >
> > ; Function Attrs: nounwind optsize
> > define void @matrix_mul(i32 %Size, i32* nocapture %Dst, i32* nocapture
> > readonly %Src, i32 %Val) #0 {
> > entry:
> >   %cmp.25 = icmp eq i32 %Size, 0
> >   br i1 %cmp.25, label %for.cond.cleanup, label
> %for.body.4.lr.ph.preheader
> >
> > for.body.4.lr.ph.preheader:                       ; preds = %entry
> >   br label %for.body.4.lr.ph
> >
> > for.body.4.lr.ph:                                 ; preds > >
%for.body.4.lr.ph.preheader, %for.cond.cleanup.3
> >   %lsr.iv5 = phi i32* [ %Src, %for.body.4.lr.ph.preheader ], [
> > %scevgep9, %for.cond.cleanup.3 ]
> >   %lsr.iv1 = phi i32* [ %Dst, %for.body.4.lr.ph.preheader ], [
> > %scevgep4, %for.cond.cleanup.3 ]
> >   %Outer.026 = phi i32 [ %inc10, %for.cond.cleanup.3 ], [ 0,
> > %for.body.4.lr.ph.preheader ]
> >   br label %for.body.4
> >
> > for.cond.cleanup.loopexit:                        ; preds >
%for.cond.cleanup.3
> >   br label %for.cond.cleanup
> >
> > for.cond.cleanup:                                 ; preds > >
%for.cond.cleanup.loopexit, %entry
> >   ret void
> >
> > for.cond.cleanup.3:                               ; preds =
%for.body.4
> >   %inc10 = add nuw nsw i32 %Outer.026, 1
> >   %exitcond27 = icmp eq i32 %inc10, %Size
> >   br i1 %exitcond27, label %for.cond.cleanup.loopexit, label %
> for.body.4.lr.ph
> >
> > for.body.4:                                       ; preds > >
%for.body.4, %for.body.4.lr.ph
> >   %lsr.iv8 = phi i32* [ %scevgep9, %for.body.4 ], [ %lsr.iv5,
> > %for.body.4.lr.ph ]
> >   %lsr.iv3 = phi i32* [ %scevgep4, %for.body.4 ], [ %lsr.iv1,
> > %for.body.4.lr.ph ]
> >   %lsr.iv = phi i32 [ %lsr.iv.next, %for.body.4 ], [ %Size, %
> for.body.4.lr.ph ]
> >   %0 = load i32, i32* %lsr.iv8, align 4, !tbaa !1
> >   %mul5 = mul i32 %0, %Val
> >   store i32 %mul5, i32* %lsr.iv3, align 4, !tbaa !1
> >   %lsr.iv.next = add i32 %lsr.iv, -1
> >   %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 1
> >   %scevgep9 = getelementptr i32, i32* %lsr.iv8, i32 1
> >   %exitcond = icmp eq i32 %lsr.iv.next, 0
> >   br i1 %exitcond, label %for.cond.cleanup.3, label %for.body.4
> >
> >
> >> What value is it avoiding being recomputed?
> > I'm not precisely sure, but it's residue from LSR.  The pass
checks
> > all computable SCEV values when a loop exits and in this case found
> > GEPs with the same value.
> >
> >> How does this pass affect register pressure?
> >> Also, your example just removes a mov and an add - the push/pops
are
> just
> >> register allocation (unless your pass in fact *reduces* register
> pressure?)
> >
> > Right, the computation eliminated is the mov and add.  Register
> > savings is a byproduct.
> >
> > Regards,
> > -steve
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150910/7e781ecb/attachment.html>

Possibly Parallel Threads

Search for more seemingly similar threads

llvm dev - Sep 2015 - [RFC] New pass: LoopExitValues

[llvm-dev] [RFC] New pass: LoopExitValues

[llvm-dev] [RFC] New pass: LoopExitValues

[llvm-dev] [RFC] New pass: LoopExitValues

Possibly Parallel Threads