Displaying 20 results from an estimated 1242 matches for "r14".
Did you mean:
14
2007 Dec 02
2
Optimised qmf_synth and iir_mem16
...@ r4 = mem, r5 = ord
cmp r5, #10
beq .order_10
cmp r5, #8
beq .order_8
ldmia sp!, { r4-r11, pc } @ Mon-supported order, return
@ TODO: try using direct form 1 filtering
.order_8:
ldmia r4, { r5-r12 } @ r5-r12 = mem[0..7]
0:
ldrsh r14, [r0], #2
add r5, r5, #4096 @ Rounding constant
str r0, [sp,#-4]! @ push r0
add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i]
mov r5, #0x7f00
orr r5, r5, #0xff @ r5 = 32767
cmp r14, r5
movgt r14, r5...
2014 Feb 08
3
[PATCH 1/2] arm: Use the UAL syntax for ldr<cc>h instructions
On Fri, 7 Feb 2014, Timothy B. Terriberry wrote:
> Martin Storsjo wrote:
>> This is required in order to build using the built-in assembler
>> in clang.
>
> These patches break the gcc build (with "Error: bad instruction").
Ah, right, sorry about that.
> Documentation I've seen is contradictory on which order ({cond}{size} or
> {size}{cond}) is correct.
2010 Sep 04
6
[LLVMdev] Possible missed optimization?
Hello, while testing trivial functions in my backend i noticed a suboptimal
way of assigning regs that had the following pattern, consider the following
function:
typedef unsigned short t;
t foo(t a, t b)
{
t a4 = b^a^18;
return a4;
}
Argument "a" is passed in R15:R14 and argument "b" is passed in R13:R12, the
return value is stored in R15:R14.
Producing the following asm code:
xor r15, r13 ; xor top part
mov r8, r14
xor r8, r12 ; xor bottom part
movi r14, 18
xor r14, r8 ; xor bottom part with imm value
However...
2008 May 28
0
ia64/pv_ops: preparation: move some functions in ivt.S to avoid lack of space.
...---------------------------
- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
- SSM_PSR_DT_AND_SRLZ_I
- ;;
- SAVE_MIN_WITH_COVER
- alloc r15=ar.pfs,0,0,3,0
- MOV_FROM_IFA(out0)
- MOV_FROM_ISR(out1)
- SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
- adds r3=8,r2 // set up second base pointer
- SSM_PSR_I(p15, p15, r14) // restore psr.i
- movl r14=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.many b6=ia64_do_page_fault // ignore return address
-END(page_fault)
-...
2008 May 28
0
ia64/pv_ops: preparation: move some functions in ivt.S to avoid lack of space.
...---------------------------
- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
- SSM_PSR_DT_AND_SRLZ_I
- ;;
- SAVE_MIN_WITH_COVER
- alloc r15=ar.pfs,0,0,3,0
- MOV_FROM_IFA(out0)
- MOV_FROM_ISR(out1)
- SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
- adds r3=8,r2 // set up second base pointer
- SSM_PSR_I(p15, p15, r14) // restore psr.i
- movl r14=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.many b6=ia64_do_page_fault // ignore return address
-END(page_fault)
-...
2014 Feb 08
0
[PATCH v2] arm: Use the UAL syntax for instructions
...rm.s
index 09917b1..598e45b 100644
--- a/celt/arm/celt_pitch_xcorr_arm.s
+++ b/celt/arm/celt_pitch_xcorr_arm.s
@@ -309,7 +309,7 @@ xcorr_kernel_edsp_process4_done
SUBS r2, r2, #1 ; j--
; Stall
SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
- LDRGTH r14, [r4], #2 ; r14 = *x++
+ LDRHGT r14, [r4], #2 ; r14 = *x++
SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
@@ -319,7 +319,7...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...n __TEXT,__text,regular,pure_instructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4, 0x90
__ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_...
2014 Feb 07
3
[PATCH 1/2] arm: Use the UAL syntax for ldr<cc>h instructions
...rm.s
index 09917b1..3c4b950 100644
--- a/celt/arm/celt_pitch_xcorr_arm.s
+++ b/celt/arm/celt_pitch_xcorr_arm.s
@@ -309,7 +309,7 @@ xcorr_kernel_edsp_process4_done
SUBS r2, r2, #1 ; j--
; Stall
SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
- LDRGTH r14, [r4], #2 ; r14 = *x++
+ LDRHGT r14, [r4], #2 ; r14 = *x++
SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
@@ -319,7 +319,7...
2010 Sep 01
0
[LLVMdev] equivalent IR, different asm
...ure_instructions
> .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> .align 4, 0x90
> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> ## BB#0:
> pushq %r14
> pushq %rbx
> subq $8, %rsp
> movq %rsi, %rbx
> movq %rdi, %r14
> movq %rdx, %rdi
> movq %rcx, %rsi
> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> movq %rax, %rcx
> shrq $32, %rcx
> testl %ecx, %ecx
> je LBB0_2
> ## BB#1:
&g...
2010 Sep 09
2
[LLVMdev] Possible missed optimization? 2.0
...possible missed optimization while testing more
trivial code.
This time it's not a with a xor but with a multiplication instruction and
the example is little bit more involved.
C code:
typedef short t;
t foo(t a, t b)
{
t a4 = a*b;
return a4;
}
argument "a" is passed in R15:R14, argument "b" in R13:R12, the return value
is stored in R15:R14.
The mul instruction takes in two 8bit regs and returns a 16bit result in
R1:R0, this is handled in the selectionDAG same way as x86 (btw mul is
marked as commutable).
Asm code:
mul r12, r15
mov r8, r0
mul...
2015 Oct 26
9
[PATCH 0/4] Add pdaemon load counters
this series makes use of the load counters we can use to get information about
the current load of the gpu.
This series includes the needed pmu bits and a debugfs interface to read them
out. Currently the values are between 0 and 255, because it is much easier to
implement it this way on the pmu.
Karol Herbst (4):
subdev/pmu/fuc: add gk104
pmu/fuc: add macros for pdaemon pwr counters
2010 Sep 04
3
[LLVMdev] Possible missed optimization?
On Sep 4, 2010, at 11:21 AM, Borja Ferrer wrote:
> I've noticed this pattern happening with other operators aswell, but used xor in this example. As i said before, i tried with different register allocation orders, but it will produce always the same result. GCC is emitting longer code, but since LLVM is so nearer to the optimal code sequence i wanted to reach it.
In LLVM, copies are
2010 Sep 21
1
[LLVMdev] Possible missed optimization on function calling?
...int b)
{
int a4 = mdiv(mcos(a), msin(b));
return a4;
}
I noticed this while testing it for the backend i'm currently developing,
but it produces exactly the same code for other targets:
march = msp430:
push.w r11
push.w r10
push.w r9
push.w r8
mov.w r14, r11
mov.w r15, r10 ; store a
mov.w r13, r15
mov.w r12, r14 ; pass b
call #msin
mov.w r15, r9
mov.w r14, r8 ; store msin(b)
mov.w r10, r15
mov.w r11, r14 ; pass a
call #mcos
mov.w r9, r13 ; pass msin(b)
mov.w r8,...
2015 Jul 13
5
[LLVMdev] Poor register allocations vs gcc
...c app2.c -S
against gcc:
gcc_custom -std=c11 -O3 -march=native -c app2.c -S
Versions (latest for each, downloaded just a few days ago):
gcc : 5.1
clang/llvm: clang+llvm-3.6.1-x86_64-apple-darwin
Host:
osx yosemite.
The assembly (cut to the essential):
LLVM:
pushq %rbp
movq %rsp, %rbp
pushq %r14
pushq %rbx
movl %edi, %r14d
leal 71(%r14), %eax
xorl %ecx, %ecx
cmpl $56, %eax
movl $92, %ebx
cmovnel %ecx, %ebx
leaq L_.str(%rip), %rdi
callq _puts
leal 71(%rbx,%r14), %eax
popq %rbx
popq %r14
popq %rbp
retq
and the gcc one:
pushq %rbp
movl $0, %eax
movl $92, %ebp
pushq...
2016 Jun 25
3
Tail call optimization is getting affected due to local function related optimization with IPRA
...o for that function following is set of collbered
register as
per regmaks collected by RegUsageInfoCollector pass.
Function Name : bitrv2
Clobbered Registers:
AH AL AX BH BL BP BPL BX CH CL CX DI DIL EAX EBP EBX ECX EDI EFLAGS ESI ESP
RAX
RBP RBX RCX RDI RSI RSP SI SIL SP SPL R8 R9 R10 R11 R12 R13 R14 R15 R8B R9B
R10B
R11B R12B R13B R14B R15B R8D R9D R10D R11D R12D R13D R14D R15D R8W R9W R10W
R11W
R12W R13W R14W R15W
How ever caller of bitrv2, makewt has callee saved registers as per CC, but
this
code results in segmentation fault when compliled with O1 because makewt
has value
of *ip in R14 re...
2007 Nov 14
10
[GE users] Apple Leopard has dtrace -- anyone used the SGE probes/scripts yet?
Hi,
Chris (cc) and I try to get the SGE master monitor work with Apple Leopard
dtrace. Unfortunately we are stuck with the error msg below.
Anyone having an idea what could be the cause? What I can rule out as
cause is function inlining for the reasons explained below.
Background information on SGE master monitor implementation is under
http://wiki.gridengine.info/wiki/index.php/Dtrace
2010 Jun 29
0
performance breakdown with Xen 3.4.3 or 4.0.0 and xenified kernel 2.6.31-r14
Hello,
I regularly perform benchmarks of Xen/KVM systems. When Xen 3.4.3 was released I started another benchmark and found out that the xenified kernel 2.6.31-r14 from Andrew Lyon had serious performance breakdowns when used with Xen 3.4.3 or Xen 4.0.0 but not with Xen 3.4.2. The benchmark measures compiling apache 2.2.15 under gentoo.
What could be the reason?
date: 02.06.2010
time emerge apache 2.2.15
native 2.6.31.12-ak2
real 0m40.736s
user 0m4...
2012 Feb 13
0
[PATCH 05/14] arm: implement exception and hypercall entries.
...INE(OFFSET_VCPU_R10, offsetof(struct vcpu_guest_context, r10));
+ DEFINE(OFFSET_VCPU_R11, offsetof(struct vcpu_guest_context, r11));
+ DEFINE(OFFSET_VCPU_R12, offsetof(struct vcpu_guest_context, r12));
+ DEFINE(OFFSET_VCPU_R13, offsetof(struct vcpu_guest_context, r13));
+ DEFINE(OFFSET_VCPU_R14, offsetof(struct vcpu_guest_context, r14));
+ DEFINE(OFFSET_VCPU_R15, offsetof(struct vcpu_guest_context, r15));
+ DEFINE(OFFSET_VCPU_DACR, offsetof(struct vcpu_guest_context, dacr));
+ DEFINE(OFFSET_VCPU_VBAR, offsetof(struct vcpu_guest_context, vbar));
+ DEFINE(OFFSET_VCPU_CONTEXTIDR, offs...
2016 Jun 25
0
Tail call optimization is getting affected due to local function related optimization with IPRA
...collbered
> register as
> per regmaks collected by RegUsageInfoCollector pass.
>
> Function Name : bitrv2
> Clobbered Registers:
> AH AL AX BH BL BP BPL BX CH CL CX DI DIL EAX EBP EBX ECX EDI EFLAGS ESI
> ESP RAX
> RBP RBX RCX RDI RSI RSP SI SIL SP SPL R8 R9 R10 R11 R12 R13 R14 R15 R8B
> R9B R10B
> R11B R12B R13B R14B R15B R8D R9D R10D R11D R12D R13D R14D R15D R8W R9W
> R10W R11W
> R12W R13W R14W R15W
>
> How ever caller of bitrv2, makewt has callee saved registers as per CC,
> but this
> code results in segmentation fault when compliled with O1 be...
2016 Jun 26
3
Tail call optimization is getting affected due to local function related optimization with IPRA
...>> per regmaks collected by RegUsageInfoCollector pass.
>>
>> Function Name : bitrv2
>> Clobbered Registers:
>> AH AL AX BH BL BP BPL BX CH CL CX DI DIL EAX EBP EBX ECX EDI EFLAGS ESI
>> ESP RAX
>> RBP RBX RCX RDI RSI RSP SI SIL SP SPL R8 R9 R10 R11 R12 R13 R14 R15 R8B
>> R9B R10B
>> R11B R12B R13B R14B R15B R8D R9D R10D R11D R12D R13D R14D R15D R8W R9W
>> R10W R11W
>> R12W R13W R14W R15W
>>
>> How ever caller of bitrv2, makewt has callee saved registers as per CC,
>> but this
>> code results in segmentation...