0000000000400698
: #include #include int main() { 400698: a9b37bfd stp x29, x30, [sp, #-208]! 40069c: 910003fd mov x29, sp // Initialize two float arrays float a[4] = {1.0, 2.0, 3.0, 4.0}; 4006a0: 1e2e1000 fmov s0, #1.000000000000000000e+00 4006a4: bd0033e0 str s0, [sp, #48] 4006a8: 1e201000 fmov s0, #2.000000000000000000e+00 4006ac: bd0037e0 str s0, [sp, #52] 4006b0: 1e211000 fmov s0, #3.000000000000000000e+00 4006b4: bd003be0 str s0, [sp, #56] 4006b8: 1e221000 fmov s0, #4.000000000000000000e+00 4006bc: bd003fe0 str s0, [sp, #60] float b[4] = {5.0, 6.0, 7.0, 8.0}; 4006c0: 1e229000 fmov s0, #5.000000000000000000e+00 4006c4: bd0023e0 str s0, [sp, #32] 4006c8: 1e231000 fmov s0, #6.000000000000000000e+00 4006cc: bd0027e0 str s0, [sp, #36] 4006d0: 1e239000 fmov s0, #7.000000000000000000e+00 4006d4: bd002be0 str s0, [sp, #40] 4006d8: 1e241000 fmov s0, #8.000000000000000000e+00 4006dc: bd002fe0 str s0, [sp, #44] 4006e0: 9100c3e0 add x0, sp, #0x30 4006e4: f90023e0 str x0, [sp, #64] objdump: Warning: source file /usr/lib/gcc/aarch64-oe-linux/13.3.0/include/arm_neon.h is more recent than object file __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32 (const float32_t *__a) { return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) __a); 4006e8: f94023e0 ldr x0, [sp, #64] 4006ec: 3dc00000 ldr q0, [x0] float result[4]; // Load arrays into NEON registers float32x4_t a_vec = vld1q_f32(a); 4006f0: 3d802fe0 str q0, [sp, #176] 4006f4: 910083e0 add x0, sp, #0x20 4006f8: f90027e0 str x0, [sp, #72] 4006fc: f94027e0 ldr x0, [sp, #72] 400700: 3dc00000 ldr q0, [x0] float32x4_t b_vec = vld1q_f32(b); 400704: 3d802be0 str q0, [sp, #160] 400708: 3dc02fe0 ldr q0, [sp, #176] 40070c: 3d801be0 str q0, [sp, #96] 400710: 3dc02be0 ldr q0, [sp, #160] 400714: 3d8017e0 str q0, [sp, #80] return __a + __b; 400718: 3dc01be1 ldr q1, [sp, #96] 40071c: 3dc017e0 ldr q0, [sp, #80] 400720: 4e20d420 fadd v0.4s, v1.4s, v0.4s // Perform vector addition float32x4_t res_vec = vaddq_f32(a_vec, b_vec); 400724: 3d8027e0 str q0, [sp, #144] 400728: 910043e0 add x0, sp, #0x10 40072c: f90047e0 str x0, [sp, #136] 400730: 3dc027e0 ldr q0, [sp, #144] 400734: 3d801fe0 str q0, [sp, #112] __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f32 (float32_t *__a, float32x4_t __b) { __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) __a, __b); 400738: 3dc01fe0 ldr q0, [sp, #112] 40073c: f94047e0 ldr x0, [sp, #136] 400740: 3d800000 str q0, [x0] } 400744: d503201f nop // Store the result back to memory vst1q_f32(result, res_vec); return 0; 40078c: 52800000 mov w0, #0x0 // #0 } 400790: a8cd7bfd ldp x29, x30, [sp], #208 400794: d65f03c0 ret