/* { dg-do compile } */
/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "riscv_th_vector.h"

/*
** f1:
**	th.vsetivli\tzero,4,e32,m1,tu,ma
**	th.vlbu\.v\tv[0-9]+,0\([a-x0-9]+\)
**	th.vlbu\.v\tv[0-9]+,0\([a-x0-9]+\)
**	th.vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
**	th.vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
**	th.vsb\.v\tv[0-9]+,0\([a-x0-9]+\)
**	ret
*/
void f1 (void * in, void *out, uint32_t x)
{
    vuint32m1_t v = __riscv_th_vlbu_v_u32m1 (in, 4);
    vuint32m1_t v2 = __riscv_th_vlbu_v_u32m1_tu (v, in, 4);
    vuint32m1_t v3 = __riscv_vadd_vx_u32m1 (v2, -16, 4);
    vuint32m1_t v4 = __riscv_vadd_vx_u32m1_tu (v3, v2, -16, 4);
    __riscv_th_vsb_v_u32m1 (out, v4, 4);
}

/*
** f2:
**	th.vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
**	th.vlm.v\tv[0-9]+,0\([a-x0-9]+\)
**	th.vsetivli\tzero,4,e32,m1,ta,ma
**	th.vlbu.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
**	th.vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
**	th.vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
**	th.vsb.v\tv[0-9]+,0\([a-x0-9]+\)
**	ret
*/
void f2 (void * in, void *out, uint32_t x)
{
    vbool32_t mask = *(vbool32_t*)in;
    asm volatile ("":::"memory");
    vuint32m1_t v = __riscv_th_vlbu_v_u32m1 (in, 4);
    vuint32m1_t v2 = __riscv_th_vlbu_v_u32m1_m (mask, in, 4);
    vuint32m1_t v3 = __riscv_vadd_vx_u32m1 (v2, -16, 4);
    vuint32m1_t v4 = __riscv_vadd_vx_u32m1_m (mask, v3, -16, 4);
    __riscv_th_vsb_v_u32m1 (out, v4, 4);
}

/*
** f3:
**	th.vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
**	th.vlm.v\tv[0-9]+,0\([a-x0-9]+\)
**	th.vsetivli\tzero,4,e32,m1,tu,mu
**	th.vlbu\.v\tv[0-9]+,0\([a-x0-9]+\)
**	th.vlbu.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
**	th.vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
**	th.vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
**	th.vsb.v\tv[0-9]+,0\([a-x0-9]+\)
**	ret
*/
void f3 (void * in, void *out, uint32_t x)
{
    vbool32_t mask = *(vbool32_t*)in;
    asm volatile ("":::"memory");
    vuint32m1_t v = __riscv_th_vlbu_v_u32m1 (in, 4);
    vuint32m1_t v2 = __riscv_th_vlbu_v_u32m1_tumu (mask, v, in, 4);
    vuint32m1_t v3 = __riscv_vadd_vx_u32m1 (v2, -16, 4);
    vuint32m1_t v4 = __riscv_vadd_vx_u32m1_tumu (mask, v3, v2, -16, 4);
    __riscv_th_vsb_v_u32m1 (out, v4, 4);
}
