/*
 * Copyright © 2015 Intel Corporation
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
 *
 */
#ifndef __OCL_SIMD_H__
#define __OCL_SIMD_H__

#include "ocl_types.h"

/////////////////////////////////////////////////////////////////////////////
// SIMD level function
/////////////////////////////////////////////////////////////////////////////
int sub_group_any(int);
int sub_group_all(int);

uint get_simd_size(void);

uint get_sub_group_size(void);
uint get_max_sub_group_size(void);
uint get_num_sub_groups(void);
uint get_sub_group_id(void);
uint get_sub_group_local_id(void);

/* broadcast */
OVERLOADABLE int sub_group_broadcast(int a,uint local_id);
OVERLOADABLE uint sub_group_broadcast(uint a, uint local_id);
OVERLOADABLE long sub_group_broadcast(long a, uint local_id);
OVERLOADABLE ulong sub_group_broadcast(ulong a, uint local_id);
OVERLOADABLE half sub_group_broadcast(half a, uint local_id);
OVERLOADABLE float sub_group_broadcast(float a, uint local_id);
OVERLOADABLE double sub_group_broadcast(double a, uint local_id);
OVERLOADABLE short sub_group_broadcast(short a,uint local_id);
OVERLOADABLE ushort sub_group_broadcast(ushort a, uint local_id);

OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id);
OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id);
/* reduce add */
OVERLOADABLE int sub_group_reduce_add(int x);
OVERLOADABLE uint sub_group_reduce_add(uint x);
OVERLOADABLE long sub_group_reduce_add(long x);
OVERLOADABLE ulong sub_group_reduce_add(ulong x);
OVERLOADABLE half sub_group_reduce_add(half x);
OVERLOADABLE float sub_group_reduce_add(float x);
OVERLOADABLE double sub_group_reduce_add(double x);
OVERLOADABLE short sub_group_reduce_add(short x);
OVERLOADABLE ushort sub_group_reduce_add(ushort x);
OVERLOADABLE short intel_sug_group_reduce_add(short x);
OVERLOADABLE ushort intel_sug_group_reduce_add(ushort x);

/* reduce min */
OVERLOADABLE int sub_group_reduce_min(int x);
OVERLOADABLE uint sub_group_reduce_min(uint x);
OVERLOADABLE long sub_group_reduce_min(long x);
OVERLOADABLE ulong sub_group_reduce_min(ulong x);
OVERLOADABLE half sub_group_reduce_min(half x);
OVERLOADABLE float sub_group_reduce_min(float x);
OVERLOADABLE double sub_group_reduce_min(double x);
OVERLOADABLE short sub_group_reduce_min(short x);
OVERLOADABLE ushort sub_group_reduce_min(ushort x);
OVERLOADABLE short intel_sug_group_reduce_min(short x);
OVERLOADABLE ushort intel_sug_group_reduce_min(ushort x);

/* reduce max */
OVERLOADABLE int sub_group_reduce_max(int x);
OVERLOADABLE uint sub_group_reduce_max(uint x);
OVERLOADABLE long sub_group_reduce_max(long x);
OVERLOADABLE ulong sub_group_reduce_max(ulong x);
OVERLOADABLE half sub_group_reduce_max(half x);
OVERLOADABLE float sub_group_reduce_max(float x);
OVERLOADABLE double sub_group_reduce_max(double x);
OVERLOADABLE short sub_group_reduce_max(short x);
OVERLOADABLE ushort sub_group_reduce_max(ushort x);
OVERLOADABLE short intel_sug_group_reduce_max(short x);
OVERLOADABLE ushort intel_sug_group_reduce_max(ushort x);

/* scan_inclusive add */
OVERLOADABLE int sub_group_scan_inclusive_add(int x);
OVERLOADABLE uint sub_group_scan_inclusive_add(uint x);
OVERLOADABLE long sub_group_scan_inclusive_add(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_add(half x);
OVERLOADABLE float sub_group_scan_inclusive_add(float x);
OVERLOADABLE double sub_group_scan_inclusive_add(double x);
OVERLOADABLE short sub_group_scan_inclusive_add(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_add(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_add(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_add(ushort x);

/* scan_inclusive min */
OVERLOADABLE int sub_group_scan_inclusive_min(int x);
OVERLOADABLE uint sub_group_scan_inclusive_min(uint x);
OVERLOADABLE long sub_group_scan_inclusive_min(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_min(half x);
OVERLOADABLE float sub_group_scan_inclusive_min(float x);
OVERLOADABLE double sub_group_scan_inclusive_min(double x);
OVERLOADABLE short sub_group_scan_inclusive_min(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_min(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_min(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_min(ushort x);

/* scan_inclusive max */
OVERLOADABLE int sub_group_scan_inclusive_max(int x);
OVERLOADABLE uint sub_group_scan_inclusive_max(uint x);
OVERLOADABLE long sub_group_scan_inclusive_max(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_max(half x);
OVERLOADABLE float sub_group_scan_inclusive_max(float x);
OVERLOADABLE double sub_group_scan_inclusive_max(double x);
OVERLOADABLE short sub_group_scan_inclusive_max(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_max(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_max(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_max(ushort x);

/* scan_exclusive add */
OVERLOADABLE int sub_group_scan_exclusive_add(int x);
OVERLOADABLE uint sub_group_scan_exclusive_add(uint x);
OVERLOADABLE long sub_group_scan_exclusive_add(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_add(half x);
OVERLOADABLE float sub_group_scan_exclusive_add(float x);
OVERLOADABLE double sub_group_scan_exclusive_add(double x);
OVERLOADABLE short sub_group_scan_exclusive_add(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_add(ushort x);
OVERLOADABLE short intel_sub_group_scan_exclusive_add(short x);
OVERLOADABLE ushort intel_sub_group_scan_exclusive_add(ushort x);

/* scan_exclusive min */
OVERLOADABLE int sub_group_scan_exclusive_min(int x);
OVERLOADABLE uint sub_group_scan_exclusive_min(uint x);
OVERLOADABLE long sub_group_scan_exclusive_min(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_min(half x);
OVERLOADABLE float sub_group_scan_exclusive_min(float x);
OVERLOADABLE double sub_group_scan_exclusive_min(double x);
OVERLOADABLE short sub_group_scan_exclusive_min(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_min(ushort x);
OVERLOADABLE short intel_sug_group_scan_exclusive_min(short x);
OVERLOADABLE ushort intel_sug_group_scan_exclusive_min(ushort x);

/* scan_exclusive max */
OVERLOADABLE int sub_group_scan_exclusive_max(int x);
OVERLOADABLE uint sub_group_scan_exclusive_max(uint x);
OVERLOADABLE long sub_group_scan_exclusive_max(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_max(half x);
OVERLOADABLE float sub_group_scan_exclusive_max(float x);
OVERLOADABLE double sub_group_scan_exclusive_max(double x);
OVERLOADABLE short sub_group_scan_exclusive_max(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_max(ushort x);
OVERLOADABLE short intel_sug_group_scan_exclusive_max(short x);
OVERLOADABLE ushort intel_sug_group_scan_exclusive_max(ushort x);

/* shuffle */
OVERLOADABLE half intel_sub_group_shuffle(half x, uint c);
OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);
OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);
OVERLOADABLE short intel_sub_group_shuffle(short x, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle(ushort x, uint c);

OVERLOADABLE float intel_sub_group_shuffle_down(float x, float y, uint c);
OVERLOADABLE int intel_sub_group_shuffle_down(int x, int y, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_down(uint x, uint y, uint c);
OVERLOADABLE short intel_sub_group_shuffle_down(short x, short y, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_down(ushort x, ushort y, uint c);

OVERLOADABLE float intel_sub_group_shuffle_up(float x, float y, uint c);
OVERLOADABLE int intel_sub_group_shuffle_up(int x, int y, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_up(uint x, uint y, uint c);
OVERLOADABLE short intel_sub_group_shuffle_up(short x, short y, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_up(ushort x, ushort y, uint c);

OVERLOADABLE float intel_sub_group_shuffle_xor(float x, uint c);
OVERLOADABLE int intel_sub_group_shuffle_xor(int x, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_xor(uint x, uint c);
OVERLOADABLE short intel_sub_group_shuffle_xor(short x, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_xor(ushort x, uint c);

/* blocak read/write */
OVERLOADABLE uint intel_sub_group_block_read(const global uint* p);
OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p);
OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p);
OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p);

OVERLOADABLE void intel_sub_group_block_write(__global uint* p, uint data);
OVERLOADABLE void intel_sub_group_block_write2(__global uint* p, uint2 data);
OVERLOADABLE void intel_sub_group_block_write4(__global uint* p, uint4 data);
OVERLOADABLE void intel_sub_group_block_write8(__global uint* p, uint8 data);

OVERLOADABLE uint intel_sub_group_block_read(image2d_t image, int2 byte_coord);
OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t image, int2 byte_coord);
OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t image, int2 byte_coord);
OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t image, int2 byte_coord);

OVERLOADABLE void intel_sub_group_block_write(image2d_t image, int2 byte_coord, uint data);
OVERLOADABLE void intel_sub_group_block_write2(image2d_t image, int2 byte_coord, uint2 data);
OVERLOADABLE void intel_sub_group_block_write4(image2d_t image, int2 byte_coord, uint4 data);
OVERLOADABLE void intel_sub_group_block_write8(image2d_t image, int2 byte_coord, uint8 data);

OVERLOADABLE uint intel_sub_group_block_read_ui(const global uint* p);
OVERLOADABLE uint2 intel_sub_group_block_read_ui2(const global uint* p);
OVERLOADABLE uint4 intel_sub_group_block_read_ui4(const global uint* p);
OVERLOADABLE uint8 intel_sub_group_block_read_ui8(const global uint* p);

OVERLOADABLE void intel_sub_group_block_write_ui(__global uint* p, uint data);
OVERLOADABLE void intel_sub_group_block_write_ui2(__global uint* p, uint2 data);
OVERLOADABLE void intel_sub_group_block_write_ui4(__global uint* p, uint4 data);
OVERLOADABLE void intel_sub_group_block_write_ui8(__global uint* p, uint8 data);

OVERLOADABLE uint intel_sub_group_block_read_ui(image2d_t image, int2 byte_coord);
OVERLOADABLE uint2 intel_sub_group_block_read_ui2(image2d_t image, int2 byte_coord);
OVERLOADABLE uint4 intel_sub_group_block_read_ui4(image2d_t image, int2 byte_coord);
OVERLOADABLE uint8 intel_sub_group_block_read_ui8(image2d_t image, int2 byte_coord);

OVERLOADABLE void intel_sub_group_block_write_ui(image2d_t image, int2 byte_coord, uint data);
OVERLOADABLE void intel_sub_group_block_write_ui2(image2d_t image, int2 byte_coord, uint2 data);
OVERLOADABLE void intel_sub_group_block_write_ui4(image2d_t image, int2 byte_coord, uint4 data);
OVERLOADABLE void intel_sub_group_block_write_ui8(image2d_t image, int2 byte_coord, uint8 data);

OVERLOADABLE void intel_sub_group_media_block_write_ui(int2 src_byte_offset, int width, int height, uint texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_ui2(int2 src_byte_offset, int width, int height, uint2 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_ui4(int2 src_byte_offset, int width, int height, uint4 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_ui8(int2 src_byte_offset, int width, int height, uint8 texels, image2d_t image);

OVERLOADABLE ushort intel_sub_group_block_read_us(const global ushort* p);
OVERLOADABLE ushort2 intel_sub_group_block_read_us2(const global ushort* p);
OVERLOADABLE ushort4 intel_sub_group_block_read_us4(const global ushort* p);
OVERLOADABLE ushort8 intel_sub_group_block_read_us8(const global ushort* p);

OVERLOADABLE void intel_sub_group_block_write_us(__global ushort* p, ushort data);
OVERLOADABLE void intel_sub_group_block_write_us2(__global ushort* p, ushort2 data);
OVERLOADABLE void intel_sub_group_block_write_us4(__global ushort* p, ushort4 data);
OVERLOADABLE void intel_sub_group_block_write_us8(__global ushort* p, ushort8 data);

OVERLOADABLE ushort intel_sub_group_block_read_us(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort2 intel_sub_group_block_read_us2(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort4 intel_sub_group_block_read_us4(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort8 intel_sub_group_block_read_us8(image2d_t image, int2 byte_coord);

OVERLOADABLE void intel_sub_group_block_write_us(image2d_t image, int2 byte_coord, ushort data);
OVERLOADABLE void intel_sub_group_block_write_us2(image2d_t image, int2 byte_coord, ushort2 data);
OVERLOADABLE void intel_sub_group_block_write_us4(image2d_t image, int2 byte_coord, ushort4 data);
OVERLOADABLE void intel_sub_group_block_write_us8(image2d_t image, int2 byte_coord, ushort8 data);

OVERLOADABLE void intel_sub_group_media_block_write_uc(int2 src_byte_offset, int width, int height, uchar texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_uc2(int2 src_byte_offset, int width, int height, uchar2 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_uc4(int2 src_byte_offset, int width, int height, uchar4 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_uc8(int2 src_byte_offset, int width, int height, uchar8 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_uc16(int2 src_byte_offset, int width, int height, uchar16 texels, image2d_t image);

OVERLOADABLE void intel_sub_group_media_block_write_us(int2 src_byte_offset, int width, int height, ushort texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_us2(int2 src_byte_offset, int width, int height, ushort2 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_us4(int2 src_byte_offset, int width, int height, ushort4 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_us8(int2 src_byte_offset, int width, int height, ushort8 texels, image2d_t image);
OVERLOADABLE void intel_sub_group_media_block_write_us16(int2 src_byte_offset, int width, int height, ushort16 texels, image2d_t image);

OVERLOADABLE uchar intel_sub_group_media_block_read_uc(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uchar2 intel_sub_group_media_block_read_uc2(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uchar4 intel_sub_group_media_block_read_uc4(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uchar8 intel_sub_group_media_block_read_uc8(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uchar16 intel_sub_group_media_block_read_uc16(int2 src_byte_offset, int width, int height, read_only image2d_t image);

OVERLOADABLE ushort intel_sub_group_media_block_read_us(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE ushort2 intel_sub_group_media_block_read_us2(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE ushort4 intel_sub_group_media_block_read_us4(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE ushort8 intel_sub_group_media_block_read_us8(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE ushort16 intel_sub_group_media_block_read_us16(int2 src_byte_offset, int width, int height, read_only image2d_t image);

OVERLOADABLE uint intel_sub_group_media_block_read_ui(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uint2 intel_sub_group_media_block_read_ui2(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uint4 intel_sub_group_media_block_read_ui4(int2 src_byte_offset, int width, int height, read_only image2d_t image);
OVERLOADABLE uint8 intel_sub_group_media_block_read_ui8(int2 src_byte_offset, int width, int height, read_only image2d_t image);
//Begin from this part is autogenerated.
//Don't modify it manually.
//simd level functions builtin functions
//floatn intel_sub_group_shuffle(floatn x, uint c)

OVERLOADABLE float2 intel_sub_group_shuffle (float2 param0, uint param1);
OVERLOADABLE float3 intel_sub_group_shuffle (float3 param0, uint param1);
OVERLOADABLE float4 intel_sub_group_shuffle (float4 param0, uint param1);
OVERLOADABLE float8 intel_sub_group_shuffle (float8 param0, uint param1);
OVERLOADABLE float16 intel_sub_group_shuffle (float16 param0, uint param1);

//intn intel_sub_group_shuffle(intn x, uint c)

OVERLOADABLE int2 intel_sub_group_shuffle (int2 param0, uint param1);
OVERLOADABLE int3 intel_sub_group_shuffle (int3 param0, uint param1);
OVERLOADABLE int4 intel_sub_group_shuffle (int4 param0, uint param1);
OVERLOADABLE int8 intel_sub_group_shuffle (int8 param0, uint param1);
OVERLOADABLE int16 intel_sub_group_shuffle (int16 param0, uint param1);

//uintn intel_sub_group_shuffle(uintn x, uint c)

OVERLOADABLE uint2 intel_sub_group_shuffle (uint2 param0, uint param1);
OVERLOADABLE uint3 intel_sub_group_shuffle (uint3 param0, uint param1);
OVERLOADABLE uint4 intel_sub_group_shuffle (uint4 param0, uint param1);
OVERLOADABLE uint8 intel_sub_group_shuffle (uint8 param0, uint param1);
OVERLOADABLE uint16 intel_sub_group_shuffle (uint16 param0, uint param1);

//floatn intel_sub_group_shuffle_down(floatn x, floatn y, uint c)

OVERLOADABLE float2 intel_sub_group_shuffle_down (float2 param0, float2 param1, uint param2);
OVERLOADABLE float3 intel_sub_group_shuffle_down (float3 param0, float3 param1, uint param2);
OVERLOADABLE float4 intel_sub_group_shuffle_down (float4 param0, float4 param1, uint param2);
OVERLOADABLE float8 intel_sub_group_shuffle_down (float8 param0, float8 param1, uint param2);
OVERLOADABLE float16 intel_sub_group_shuffle_down (float16 param0, float16 param1, uint param2);

//intn intel_sub_group_shuffle_down(intn x, intn y, uint c)

OVERLOADABLE int2 intel_sub_group_shuffle_down (int2 param0, int2 param1, uint param2);
OVERLOADABLE int3 intel_sub_group_shuffle_down (int3 param0, int3 param1, uint param2);
OVERLOADABLE int4 intel_sub_group_shuffle_down (int4 param0, int4 param1, uint param2);
OVERLOADABLE int8 intel_sub_group_shuffle_down (int8 param0, int8 param1, uint param2);
OVERLOADABLE int16 intel_sub_group_shuffle_down (int16 param0, int16 param1, uint param2);

//uintn intel_sub_group_shuffle_down(uintn x, uintn y, uint c)

OVERLOADABLE uint2 intel_sub_group_shuffle_down (uint2 param0, uint2 param1, uint param2);
OVERLOADABLE uint3 intel_sub_group_shuffle_down (uint3 param0, uint3 param1, uint param2);
OVERLOADABLE uint4 intel_sub_group_shuffle_down (uint4 param0, uint4 param1, uint param2);
OVERLOADABLE uint8 intel_sub_group_shuffle_down (uint8 param0, uint8 param1, uint param2);
OVERLOADABLE uint16 intel_sub_group_shuffle_down (uint16 param0, uint16 param1, uint param2);

//floatn intel_sub_group_shuffle_up(floatn x, floatn y, uint c)

OVERLOADABLE float2 intel_sub_group_shuffle_up (float2 param0, float2 param1, uint param2);
OVERLOADABLE float3 intel_sub_group_shuffle_up (float3 param0, float3 param1, uint param2);
OVERLOADABLE float4 intel_sub_group_shuffle_up (float4 param0, float4 param1, uint param2);
OVERLOADABLE float8 intel_sub_group_shuffle_up (float8 param0, float8 param1, uint param2);
OVERLOADABLE float16 intel_sub_group_shuffle_up (float16 param0, float16 param1, uint param2);

//intn intel_sub_group_shuffle_up(intn x, intn y, uint c)

OVERLOADABLE int2 intel_sub_group_shuffle_up (int2 param0, int2 param1, uint param2);
OVERLOADABLE int3 intel_sub_group_shuffle_up (int3 param0, int3 param1, uint param2);
OVERLOADABLE int4 intel_sub_group_shuffle_up (int4 param0, int4 param1, uint param2);
OVERLOADABLE int8 intel_sub_group_shuffle_up (int8 param0, int8 param1, uint param2);
OVERLOADABLE int16 intel_sub_group_shuffle_up (int16 param0, int16 param1, uint param2);

//uintn intel_sub_group_shuffle_up(uintn x, uintn y, uint c)

OVERLOADABLE uint2 intel_sub_group_shuffle_up (uint2 param0, uint2 param1, uint param2);
OVERLOADABLE uint3 intel_sub_group_shuffle_up (uint3 param0, uint3 param1, uint param2);
OVERLOADABLE uint4 intel_sub_group_shuffle_up (uint4 param0, uint4 param1, uint param2);
OVERLOADABLE uint8 intel_sub_group_shuffle_up (uint8 param0, uint8 param1, uint param2);
OVERLOADABLE uint16 intel_sub_group_shuffle_up (uint16 param0, uint16 param1, uint param2);

//floatn intel_sub_group_shuffle_xor(floatn x, uint c)

OVERLOADABLE float2 intel_sub_group_shuffle_xor (float2 param0, uint param1);
OVERLOADABLE float3 intel_sub_group_shuffle_xor (float3 param0, uint param1);
OVERLOADABLE float4 intel_sub_group_shuffle_xor (float4 param0, uint param1);
OVERLOADABLE float8 intel_sub_group_shuffle_xor (float8 param0, uint param1);
OVERLOADABLE float16 intel_sub_group_shuffle_xor (float16 param0, uint param1);

//intn intel_sub_group_shuffle_xor(intn x, uint c)

OVERLOADABLE int2 intel_sub_group_shuffle_xor (int2 param0, uint param1);
OVERLOADABLE int3 intel_sub_group_shuffle_xor (int3 param0, uint param1);
OVERLOADABLE int4 intel_sub_group_shuffle_xor (int4 param0, uint param1);
OVERLOADABLE int8 intel_sub_group_shuffle_xor (int8 param0, uint param1);
OVERLOADABLE int16 intel_sub_group_shuffle_xor (int16 param0, uint param1);

//uintn intel_sub_group_shuffle_xor(uintn x, uint c)

OVERLOADABLE uint2 intel_sub_group_shuffle_xor (uint2 param0, uint param1);
OVERLOADABLE uint3 intel_sub_group_shuffle_xor (uint3 param0, uint param1);
OVERLOADABLE uint4 intel_sub_group_shuffle_xor (uint4 param0, uint param1);
OVERLOADABLE uint8 intel_sub_group_shuffle_xor (uint8 param0, uint param1);
OVERLOADABLE uint16 intel_sub_group_shuffle_xor (uint16 param0, uint param1);

#endif
