| //+build !noasm !appengine |
| // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT |
| |
| TEXT ยท_sum_float64_sse4(SB), $0-24 |
| |
| MOVQ buf+0(FP), DI |
| MOVQ len+8(FP), SI |
| MOVQ res+16(FP), DX |
| |
| LONG $0xc0570f66 // xorpd xmm0, xmm0 |
| WORD $0x8548; BYTE $0xf6 // test rsi, rsi |
| JE LBB0_14 |
| LONG $0x03fe8348 // cmp rsi, 3 |
| JBE LBB0_2 |
| WORD $0x8949; BYTE $0xf1 // mov r9, rsi |
| LONG $0xfce18349 // and r9, -4 |
| JE LBB0_2 |
| LONG $0xfc418d4d // lea r8, [r9 - 4] |
| WORD $0x8944; BYTE $0xc0 // mov eax, r8d |
| WORD $0xe8c1; BYTE $0x02 // shr eax, 2 |
| WORD $0xc0ff // inc eax |
| LONG $0x03e08348 // and rax, 3 |
| JE LBB0_7 |
| WORD $0xf748; BYTE $0xd8 // neg rax |
| LONG $0xc0570f66 // xorpd xmm0, xmm0 |
| WORD $0xc931 // xor ecx, ecx |
| LONG $0xc9570f66 // xorpd xmm1, xmm1 |
| |
| LBB0_9: |
| LONG $0x14100f66; BYTE $0xcf // movupd xmm2, oword [rdi + 8*rcx] |
| LONG $0x5c100f66; WORD $0x10cf // movupd xmm3, oword [rdi + 8*rcx + 16] |
| LONG $0xc2580f66 // addpd xmm0, xmm2 |
| LONG $0xcb580f66 // addpd xmm1, xmm3 |
| LONG $0x04c18348 // add rcx, 4 |
| WORD $0xff48; BYTE $0xc0 // inc rax |
| JNE LBB0_9 |
| JMP LBB0_10 |
| |
| LBB0_2: |
| WORD $0x3145; BYTE $0xc9 // xor r9d, r9d |
| |
| LBB0_3: |
| LONG $0xcf048d4a // lea rax, [rdi + 8*r9] |
| WORD $0x294c; BYTE $0xce // sub rsi, r9 |
| |
| LBB0_4: |
| LONG $0x00580ff2 // addsd xmm0, qword [rax] |
| LONG $0x08c08348 // add rax, 8 |
| WORD $0xff48; BYTE $0xce // dec rsi |
| JNE LBB0_4 |
| |
| LBB0_14: |
| LONG $0x02110ff2 // movsd qword [rdx], xmm0 |
| RET |
| |
| LBB0_7: |
| WORD $0xc931 // xor ecx, ecx |
| LONG $0xc0570f66 // xorpd xmm0, xmm0 |
| LONG $0xc9570f66 // xorpd xmm1, xmm1 |
| |
| LBB0_10: |
| LONG $0x0cf88349 // cmp r8, 12 |
| JB LBB0_13 |
| WORD $0x894c; BYTE $0xc8 // mov rax, r9 |
| WORD $0x2948; BYTE $0xc8 // sub rax, rcx |
| LONG $0xcf4c8d48; BYTE $0x70 // lea rcx, [rdi + 8*rcx + 112] |
| |
| LBB0_12: |
| LONG $0x51100f66; BYTE $0x90 // movupd xmm2, oword [rcx - 112] |
| LONG $0x59100f66; BYTE $0xa0 // movupd xmm3, oword [rcx - 96] |
| LONG $0x61100f66; BYTE $0xb0 // movupd xmm4, oword [rcx - 80] |
| LONG $0x69100f66; BYTE $0xc0 // movupd xmm5, oword [rcx - 64] |
| LONG $0xd0580f66 // addpd xmm2, xmm0 |
| LONG $0xd9580f66 // addpd xmm3, xmm1 |
| LONG $0x71100f66; BYTE $0xd0 // movupd xmm6, oword [rcx - 48] |
| LONG $0x79100f66; BYTE $0xe0 // movupd xmm7, oword [rcx - 32] |
| LONG $0xf4580f66 // addpd xmm6, xmm4 |
| LONG $0xf2580f66 // addpd xmm6, xmm2 |
| LONG $0xfd580f66 // addpd xmm7, xmm5 |
| LONG $0xfb580f66 // addpd xmm7, xmm3 |
| LONG $0x41100f66; BYTE $0xf0 // movupd xmm0, oword [rcx - 16] |
| LONG $0x09100f66 // movupd xmm1, oword [rcx] |
| LONG $0xc6580f66 // addpd xmm0, xmm6 |
| LONG $0xcf580f66 // addpd xmm1, xmm7 |
| LONG $0x80e98348 // sub rcx, -128 |
| LONG $0xf0c08348 // add rax, -16 |
| JNE LBB0_12 |
| |
| LBB0_13: |
| LONG $0xc1580f66 // addpd xmm0, xmm1 |
| LONG $0xc07c0f66 // haddpd xmm0, xmm0 |
| WORD $0x3949; BYTE $0xf1 // cmp r9, rsi |
| JNE LBB0_3 |
| JMP LBB0_14 |