@@ -7,7 +7,7 @@ void im2col(const int * data_im,
7
7
const int pad_h, const int pad_w,
8
8
const int stride_h, const int stride_w,
9
9
const int dilation_h, const int dilation_w,
10
- const int height_col, const int width_col, const int offset ) {
10
+ const int height_col, const int width_col) {
11
11
#pragma HLS INTERFACE m_axi port=data_im offset=slave bundle=gmem
12
12
#pragma HLS INTERFACE m_axi port=data_col offset=slave bundle=gmem
13
13
#pragma HLS INTERFACE s_axilite port=data_im bundle=control
@@ -25,47 +25,66 @@ void im2col(const int * data_im,
25
25
#pragma HLS INTERFACE s_axilite port=dilation_w bundle=control
26
26
#pragma HLS INTERFACE s_axilite port=height_col bundle=control
27
27
#pragma HLS INTERFACE s_axilite port=width_col bundle=control
28
- #pragma HLS INTERFACE s_axilite port=offset bundle=control
28
+ // #pragma HLS INTERFACE s_axilite port=offset bundle=control
29
29
#pragma HLS INTERFACE s_axilite port = return bundle = control
30
30
31
- // int tid = threadIdx.x + blockDim.x * blockIdx.x;
32
- // for(int64_t index = tid; index < n; index += gridDim.x*blockDim.x){
31
+ const int BS = 32 ;
32
+ const int MAX_KERNEL = 11 ;
33
+ char buf[MAX_KERNEL*MAX_KERNEL][BS];
34
+ const int cols = height_col * width_col;
35
+ const int cols_offset = (cols + 63 ) / 64 * 64 ;
36
+ for (int ri = 0 ; ri < n; ri+=BS){
37
+ int chunk_n = BS;
38
+ if (ri + BS > n) chunk_n = n - ri;
39
+ for (int ti = 0 ; ti < chunk_n; ti++){
40
+ #pragma HLS PIPELINE
41
+ int index = ri + ti;
42
+ const int h_index = index / width_col;
43
+ const int h_col = h_index % height_col;
44
+ const int w_col = index % width_col;
45
+ const int c_im = h_index / height_col;
46
+ const int c_col = c_im * kernel_h * kernel_w;
47
+ const int h_offset = h_col * stride_h - pad_h;
48
+ const int w_offset = w_col * stride_w - pad_w;
49
+ int dst_index = c_col * cols_offset + h_col * width_col + w_col;
50
+ int src_index = (c_im * height + h_offset) * width + w_offset;
33
51
34
- for (int index = 0 ; index < n; index++){
35
- const int h_index = index / width_col;
36
- const int h_col = h_index % height_col;
37
- const int w_col = index % width_col;
38
- const int c_im = h_index / height_col;
39
- const int c_col = c_im * kernel_h * kernel_w;
40
- const int h_offset = h_col * stride_h - pad_h;
41
- const int w_offset = w_col * stride_w - pad_w;
42
- // int* data_col_ptr = data_col;
43
- // data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
44
- // int dst_index = offset + (c_col * height_col + h_col) * width_col + w_col;
45
- const int cols = height_col * width_col;
46
- const int cols_offset = (cols + (MATRIX_PAD-1 )) / MATRIX_PAD * MATRIX_PAD;
47
- int dst_index = offset + c_col * cols_offset + h_col * width_col + w_col;
48
- // const int * data_im_ptr = data_im;
49
- // data_im_ptr += (c_im * height + h_offset) * width + w_offset;
50
- int src_index = (c_im * height + h_offset) * width + w_offset;
52
+ for (int i = 0 ; i < kernel_h; ++i) {
53
+ for (int j = 0 ; j < kernel_w; ++j) {
54
+ int h_im = h_offset + i * dilation_h;
55
+ int w_im = w_offset + j * dilation_w;
56
+ if (h_im < 0 || w_im < 0 || h_im >= height || w_im >= width){
57
+ // data_col[dst_index] = 0;
58
+ buf[i*kernel_w + j][ti] = 0 ;
59
+ }else {
60
+ // data_col[dst_index] = data_im[src_index + i*dilation_h * width + j * dilation_w];
61
+ buf[i*kernel_w + j][ti] = data_im[src_index + i*dilation_h * width + j * dilation_w];
62
+ }
63
+ // dst_index += cols_offset;//height_col * width_col;
64
+ }
65
+ }
66
+ }
51
67
52
- #pragma HLS PIPELINE
53
68
for (int i = 0 ; i < kernel_h; ++i) {
54
69
for (int j = 0 ; j < kernel_w; ++j) {
55
- int h_im = h_offset + i * dilation_h;
56
- int w_im = w_offset + j * dilation_w;
57
- // *data_col_ptr = data_im_ptr[i*dilation_h * width + j * dilation_w];
58
- if (h_im < 0 || w_im < 0 || h_im >= height || w_im >= width){
59
- data_col[dst_index] = 0 ;
60
- }else {
61
- data_col[dst_index] = data_im[src_index + i*dilation_h * width + j * dilation_w];
70
+ const int kernel_index = i*kernel_w + j;
71
+ for (int ti = 0 ; ti < chunk_n; ti++){
72
+ #pragma HLS PIPELINE
73
+ int index = ri + ti;
74
+ const int h_index = index / width_col;
75
+ const int h_col = h_index % height_col;
76
+ const int w_col = index % width_col;
77
+ const int c_im = h_index / height_col;
78
+ const int c_col = c_im * kernel_h * kernel_w;
79
+ const int h_offset = h_col * stride_h - pad_h;
80
+ const int w_offset = w_col * stride_w - pad_w;
81
+ int dst_index = c_col * cols_offset + h_col * width_col + w_col + kernel_index * cols_offset;
82
+ data_col[dst_index] = buf[kernel_index][ti];
62
83
}
63
- // (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
64
- // (data_im_ptr[i * dilation_h * width + j * dilation_w]) : 0;
65
- // data_col_ptr += height_col * width_col;
66
- dst_index += cols_offset;// height_col * width_col;
84
+ // dst_index += cols_offset;//height_col * width_col;
67
85
}
68
86
}
69
87
}
88
+
70
89
}
71
90
}
0 commit comments