__kernel void test_fn(__global ushort4 *srcValues, __global ushort3 *destBuffer) {
  #define SIZE 128
  __private ushort3 priv[SIZE];
  int tid = get_global_id(0);
  //avoid accessing outside buffer limits
  if (SIZE <= tid*3 )
    return;
  vstore3(srcValues[tid].xyz, tid, (__private ushort *)priv);
  ushort3 val = vload3(tid, (__private ushort *)priv);
  vstore3(val, tid, (__global ushort *)destBuffer);
}