#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
__kernel void test_atomic_fn(__global int *finalDest, volatile __local int *destMemory)
{
  int tid = get_global_id(0), gid = get_group_id(0), lid = get_local_id( 0 );
  size_t numBits = sizeof( finalDest[0] ) * 8;
  int bitIndex = tid & ( numBits - 1 );

  destMemory[lid] = finalDest[tid] | (1 << bitIndex);

  barrier(CLK_LOCAL_MEM_FENCE);

  if (lid > 0)
  {
    atomic_xor( &destMemory[0], destMemory[lid]);
  }

  barrier(CLK_LOCAL_MEM_FENCE);

  finalDest[tid] = destMemory[lid];  
}
