Ilia Mirkin
2014-Feb-09  20:51 UTC
[Nouveau] [PATCH 1/2] drm/nouveau: replace ffsll with __ffs64
The ffsll function is a lot slower than the __ffs64 built-in which
compiles to a single instruction on 64-bit. It's also nice to avoid
custom versions of standard functions. Note that __ffs == ffs - 1.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
I wrote a user-space program to test these out and make sure that the
functions behaved as expected. The logic in abi16 had to be flipped around a
bit since __ffs doesn't distinguish between 0 and 1. There's a minor
difference in that init->channel is going to get returned as 0 for ENOSPC vs
 -1, but I can't imagine that'd matter.
 drivers/gpu/drm/nouveau/core/core/parent.c |  2 +-
 drivers/gpu/drm/nouveau/core/os.h          | 11 -----------
 drivers/gpu/drm/nouveau/nouveau_abi16.c    |  4 ++--
 3 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/core/core/parent.c
b/drivers/gpu/drm/nouveau/core/core/parent.c
index 313380c..dee5d12 100644
--- a/drivers/gpu/drm/nouveau/core/core/parent.c
+++ b/drivers/gpu/drm/nouveau/core/core/parent.c
@@ -49,7 +49,7 @@ nouveau_parent_sclass(struct nouveau_object *parent, u16
handle,
 
 	mask = nv_parent(parent)->engine;
 	while (mask) {
-		int i = ffsll(mask) - 1;
+		int i = __ffs64(mask);
 
 		if (nv_iclass(parent, NV_CLIENT_CLASS))
 			engine = nv_engine(nv_client(parent)->device);
diff --git a/drivers/gpu/drm/nouveau/core/os.h
b/drivers/gpu/drm/nouveau/core/os.h
index 191e739..3cd6120 100644
--- a/drivers/gpu/drm/nouveau/core/os.h
+++ b/drivers/gpu/drm/nouveau/core/os.h
@@ -23,17 +23,6 @@
 
 #include <asm/unaligned.h>
 
-static inline int
-ffsll(u64 mask)
-{
-	int i;
-	for (i = 0; i < 64; i++) {
-		if (mask & (1ULL << i))
-			return i + 1;
-	}
-	return 0;
-}
-
 #ifndef ioread32_native
 #ifdef __BIG_ENDIAN
 #define ioread16_native ioread16be
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 900fae0..b701117 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -270,8 +270,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 		return nouveau_abi16_put(abi16, -EINVAL);
 
 	/* allocate "abi16 channel" data and make up a handle for it */
-	init->channel = ffsll(~abi16->handles);
-	if (!init->channel--)
+	init->channel = __ffs64(~abi16->handles);
+	if (~abi16->handles == 0)
 		return nouveau_abi16_put(abi16, -ENOSPC);
 
 	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
-- 
1.8.3.2
Ilia Mirkin
2014-Feb-09  20:51 UTC
[Nouveau] [PATCH 2/2] drm/nouveau/abi16: fix handles past the 32nd one
abi16->handles is a u64, so make sure to use 1ULL << val when
modifying.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
Noticed this when doing the previous patch. I'm not sure whether this
affects
64-bit builds or not, didn't care to look at the assembly or check the
standard.
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index b701117..66abf4d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -139,7 +139,7 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 
 	/* destroy channel object, all children will be killed too */
 	if (chan->chan) {
-		abi16->handles &= ~(1 << (chan->chan->handle &
0xffff));
+		abi16->handles &= ~(1ULL << (chan->chan->handle &
0xffff));
 		nouveau_channel_del(&chan->chan);
 	}
 
@@ -280,7 +280,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 
 	INIT_LIST_HEAD(&chan->notifiers);
 	list_add(&chan->head, &abi16->channels);
-	abi16->handles |= (1 << init->channel);
+	abi16->handles |= (1ULL << init->channel);
 
 	/* create channel object and initialise dma and fence management */
 	ret = nouveau_channel_new(drm, cli, NVDRM_DEVICE, NVDRM_CHAN |
-- 
1.8.3.2
Ben Skeggs
2014-Feb-10  04:26 UTC
[Nouveau] [PATCH 1/2] drm/nouveau: replace ffsll with __ffs64
On Mon, Feb 10, 2014 at 6:51 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:> The ffsll function is a lot slower than the __ffs64 built-in which > compiles to a single instruction on 64-bit. It's also nice to avoid > custom versions of standard functions. Note that __ffs == ffs - 1. > > Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>Merged both patches in the series. Thanks.> --- > > I wrote a user-space program to test these out and make sure that the > functions behaved as expected. The logic in abi16 had to be flipped around a > bit since __ffs doesn't distinguish between 0 and 1. There's a minor > difference in that init->channel is going to get returned as 0 for ENOSPC vs > -1, but I can't imagine that'd matter. > > drivers/gpu/drm/nouveau/core/core/parent.c | 2 +- > drivers/gpu/drm/nouveau/core/os.h | 11 ----------- > drivers/gpu/drm/nouveau/nouveau_abi16.c | 4 ++-- > 3 files changed, 3 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/core/core/parent.c b/drivers/gpu/drm/nouveau/core/core/parent.c > index 313380c..dee5d12 100644 > --- a/drivers/gpu/drm/nouveau/core/core/parent.c > +++ b/drivers/gpu/drm/nouveau/core/core/parent.c > @@ -49,7 +49,7 @@ nouveau_parent_sclass(struct nouveau_object *parent, u16 handle, > > mask = nv_parent(parent)->engine; > while (mask) { > - int i = ffsll(mask) - 1; > + int i = __ffs64(mask); > > if (nv_iclass(parent, NV_CLIENT_CLASS)) > engine = nv_engine(nv_client(parent)->device); > diff --git a/drivers/gpu/drm/nouveau/core/os.h b/drivers/gpu/drm/nouveau/core/os.h > index 191e739..3cd6120 100644 > --- a/drivers/gpu/drm/nouveau/core/os.h > +++ b/drivers/gpu/drm/nouveau/core/os.h > @@ -23,17 +23,6 @@ > > #include <asm/unaligned.h> > > -static inline int > -ffsll(u64 mask) > -{ > - int i; > - for (i = 0; i < 64; i++) { > - if (mask & (1ULL << i)) > - return i + 1; > - } > - return 0; > -} > - > #ifndef ioread32_native > #ifdef __BIG_ENDIAN > #define ioread16_native ioread16be > diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c > index 900fae0..b701117 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c > +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c > @@ -270,8 +270,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) > return nouveau_abi16_put(abi16, -EINVAL); > > /* allocate "abi16 channel" data and make up a handle for it */ > - init->channel = ffsll(~abi16->handles); > - if (!init->channel--) > + init->channel = __ffs64(~abi16->handles); > + if (~abi16->handles == 0) > return nouveau_abi16_put(abi16, -ENOSPC); > > chan = kzalloc(sizeof(*chan), GFP_KERNEL); > -- > 1.8.3.2 > > _______________________________________________ > dri-devel mailing list > dri-devel at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
Reasonably Related Threads
- [PATCH] drm/nouveau: idle all channels before suspending
- [PATCH 0/2] drm/nouveau: Use more standard logging styles
- [PATCH 2/2] drm/nouveau/abi16: fix handles past the 32nd one
- [PATCH 1/2] drm/nouveau: hold mutex while syncing to kernel channel
- [PATCH] drm/nouveau: fix handling empty channel list in ioctl's