typedef write_only image2d_t write_only_image2d_float;
typedef read_only image2d_t read_only_image2d_float;

constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

kernel void image_filter(int n, int m, global float *filter_weights,
                         read_only_image2d_float src_image,
                         write_only_image2d_float dst_image) {
    int i, j;
    int indx = 0;
    int tid_x = get_global_id(0);
    int tid_y = get_global_id(1);
    float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );

    n = tid_x;
    m = tid_y;
    for (i=-m/2; i<(m+1)/2; i++){
        for (j=-n/2; j<(n+1)/2; j++){
            float  w = filter_weights[indx++];
            if (w != 0.0f) {
                  filter_result += w * read_imagef(src_image, sampler,
                                                  (int2)(tid_x + j, tid_y + i));
            }
        }
    }
    write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);
}
