
__kernel void test_fn( const __global ushort4 *src, __global ushort4 *dst, __local ushort4 *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )
{
 int i;
 copiesPerWorkgroup = copiesPerWorkItem = stride = 1;
 for(i=0; i<copiesPerWorkItem; i++)
   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (ushort4)(ushort)0;
 barrier( CLK_LOCAL_MEM_FENCE );
 event_t event;
 event = async_work_group_strided_copy( (__local ushort4*)localBuffer, (__global const ushort4*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );
 wait_group_events( 1, &event );
 for(i=0; i<copiesPerWorkItem; i++)
   dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
}
