@@ -116,107 +116,6 @@ __global__ void CalculateProbRBFLabelKernel(const int n, const Dtype* source_p,
116116 }
117117}
118118
119- template <typename Dtype>
120- __global__ void CalculateOneOfKRBFLabelKernel (const int n, const Dtype* source_label, const Dtype* target_p,
121- const int source_num, const int total_num, const int label_dim, const Dtype sigma, Dtype* out) {
122- CUDA_KERNEL_LOOP (index, n) {
123- int index1 = index / total_num;
124- int index2 = index % total_num;
125- if (index1 < source_num && index2 < source_num){
126- // ss
127- out[index] = (source_label[index1] == source_label[index2]) ? Dtype (1 ) : exp (Dtype (-1 ));
128- }
129- else if (index1 < source_num && index2 >= source_num){
130- int offset = label_dim * (index2 - source_num);
131- Dtype sum = Dtype (0 );
132- for (int j = 0 ;j < label_dim;++j){
133- sum += (j == source_label[index1]) ?
134- (Dtype (1 ) - target_p[offset + j]) * (Dtype (1 ) - target_p[offset + j]):
135- target_p[offset + j] * target_p[offset + j];
136- }
137- out[index] = exp (-sum / sigma);
138- }
139- else if (index1 >= source_num && index2 < source_num){
140- // ts
141- int offset = label_dim * (index1 - source_num);
142- Dtype sum = Dtype (0 );
143- for (int j = 0 ;j < label_dim;++j){
144- sum += (j == source_label[index2]) ?
145- (Dtype (1 ) - target_p[offset + j]) * (Dtype (1 ) - target_p[offset + j]):
146- target_p[offset + j] * target_p[offset + j];
147- }
148- out[index] = exp (-sum / sigma);
149- }
150- else {
151- // tt
152- int offset1 = label_dim * (index1 - source_num);
153- int offset2 = label_dim * (index2 - source_num);
154- Dtype sum = Dtype (0 );
155- for (int j = 0 ;j < label_dim;++j){
156- sum += (target_p[offset1 + j] - target_p[offset2 + j]) * (target_p[offset1 + j] - target_p[offset2 + j]);
157- }
158- out[index] = exp (-sum / sigma);
159- }
160- }
161- }
162-
163- template <typename Dtype>
164- __global__ void CalculateIdentifyLabelKernel (const int n, const Dtype* source_label, const Dtype* target_p,
165- const int source_num, const int total_num, const int label_dim, Dtype* out) {
166- CUDA_KERNEL_LOOP (index, n) {
167- int index1 = index / total_num;
168- int index2 = index % total_num;
169- if (index1 < source_num && index2 < source_num){
170- // ss
171- out[index] = (source_label[index1] == source_label[index2]) ? Dtype (1 ) : Dtype (0 );
172- }
173- else if (index1 < source_num && index2 >= source_num){
174- int offset = label_dim * (index2 - source_num);
175- Dtype max = target_p[offset + (int )source_label[index1]];
176- out[index] = Dtype (1 );
177- for (int j = 0 ;j < label_dim;++j){
178- if (target_p[offset + j] > max){
179- out[index] = Dtype (0 );
180- break ;
181- }
182- }
183- }
184- else if (index1 >= source_num && index2 < source_num){
185- // ts
186- int offset = label_dim * (index1 - source_num);
187- Dtype max = target_p[offset + (int )source_label[index2]];
188- out[index] = Dtype (1 );
189- for (int j = 0 ;j < label_dim;++j){
190- if (target_p[offset + j] > max){
191- out[index] = Dtype (0 );
192- break ;
193- }
194- }
195- }
196- else {
197- // tt
198- int offset1 = label_dim * (index1 - source_num);
199- int offset2 = label_dim * (index2 - source_num);
200- Dtype max = Dtype (-1 );
201- int max_index;
202- for (int j = 0 ;j < label_dim;++j){
203- if (target_p[offset1 + j] > max){
204- max = target_p[offset1 + j];
205- max_index = j;
206- }
207- }
208- out[index] = Dtype (1 );
209- max = target_p[offset2 + max_index];
210- for (int j = 0 ;j < label_dim;++j){
211- if (target_p[offset2 + j] > max){
212- out[index] = Dtype (0 );
213- break ;
214- }
215- }
216- }
217- }
218- }
219-
220119template <typename Dtype>
221120void JMMDLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
222121 const vector<Blob<Dtype>*>& top) {
@@ -289,9 +188,9 @@ void JMMDLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
289188 }
290189 sigma_ = bandwidth / total_num_;
291190 }
292- CalculateProbRBFLabelKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>> (
293- nthreads, bottom[2 ]->gpu_data (), bottom[3 ]->gpu_data (),
294- source_num_, total_num_, label_dim, sigma_, label_kernel_num_, label_kernel_mul_, delta_.mutable_gpu_data ());
191+ CalculateProbRBFLabelKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>> (
192+ nthreads, bottom[2 ]->gpu_data (), bottom[3 ]->gpu_data (),
193+ source_num_, total_num_, label_dim, sigma_, label_kernel_num_, label_kernel_mul_, delta_.mutable_gpu_data ());
295194 }
296195 else {
297196 caffe_gpu_set (total_num_ * total_num_, Dtype (1 ), delta_.mutable_gpu_data ());
@@ -301,84 +200,29 @@ void JMMDLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
301200 top[0 ]->mutable_cpu_data ()[0 ] = Dtype (0 );
302201}
303202
304- template <typename Dtype>
305- __global__ void CalculateDiff (const int n, const Dtype* source, const Dtype* target,
306- const int source_num, const int total_num, const int dim, const Dtype* kernel_val,
307- const Dtype gamma, const Dtype* label_kernel, const Dtype* data, const int data_index,
308- Dtype* source_diff, Dtype* target_diff) {
309- CUDA_KERNEL_LOOP (index, n) {
310- int oppo_index = index / dim;
311- int dim_offset = index % dim;
312- Dtype data1 = data[dim_offset];
313- Dtype data2 = (oppo_index >= source_num) ?
314- target[dim * (oppo_index - source_num) + dim_offset]:
315- source[dim * oppo_index + dim_offset];
316- int kernel_index = data_index * total_num + oppo_index;
317- Dtype factor_of_diff = -2 * gamma * kernel_val[kernel_index] * label_kernel[kernel_index];
318- if (oppo_index >= source_num){
319- oppo_index -= source_num;
320- if (data_index >= source_num)
321- target_diff[oppo_index * dim + dim_offset] += factor_of_diff * (data2 - data1);
322- else
323- target_diff[oppo_index * dim + dim_offset] += -0.5 * factor_of_diff * (data2 - data1);
324- }
325- else {
326- if (data_index < source_num)
327- source_diff[oppo_index * dim + dim_offset] += 0.25 * factor_of_diff * (data2 - data1);
328- else
329- source_diff[oppo_index * dim + dim_offset] += -0.5 * factor_of_diff * (data2 - data1);
330- }
331- }
332- }
333-
334203template <typename Dtype>
335204void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
336205 const vector<bool >& propagate_down,
337206 const vector<Blob<Dtype>*>& bottom) {
338207 Dtype* source_diff = bottom[0 ]->mutable_gpu_diff ();
339208 Dtype* target_diff = bottom[1 ]->mutable_gpu_diff ();
340- Dtype* label_source_diff = (label_back_propagate_) ? bottom[2 ]->mutable_gpu_diff () : NULL ;
341- Dtype* label_target_diff = (label_back_propagate_) ? bottom[3 ]->mutable_gpu_diff () : NULL ;
342- int label_dim = (label_back_propagate_) ? bottom[2 ]->count () / bottom[2 ]->count (0 , 1 ) : 0 ;
343- if (label_back_propagate_){
344- caffe_gpu_set (source_num_ * label_dim, Dtype (0 ), label_source_diff);
345- caffe_gpu_set (target_num_ * label_dim, Dtype (0 ), label_target_diff);
346- }
209+ Dtype* label_source_diff = bottom[2 ]->mutable_gpu_diff ();
210+ Dtype* label_target_diff = bottom[3 ]->mutable_gpu_diff ();
211+ int label_dim = bottom[2 ]->count () / bottom[2 ]->count (0 , 1 );
212+ caffe_gpu_set (source_num_ * label_dim, Dtype (0 ), label_source_diff);
213+ caffe_gpu_set (target_num_ * label_dim, Dtype (0 ), label_target_diff);
347214
348215 caffe_gpu_set (source_num_ * dim_, Dtype (0 ), source_diff);
349216 caffe_gpu_set (target_num_ * dim_, Dtype (0 ), target_diff);
350- /*
351- for(int i = 0;i < total_num_;++i){
352- int nthreads = total_num_ * dim_;
353- const Dtype* data = (i >= source_num_) ?
354- (bottom[1]->gpu_data() + dim_ * (i - source_num_)) :
355- (bottom[0]->gpu_data() + dim_ * i);
356-
357- Dtype gamma_times = pow(kernel_mul_, (Dtype)(kernel_num_ / 2));
358- Dtype kernel_gamma = gamma_ / gamma_times;
359-
360- for(int j = 0;j < kernel_num_;++j){
361- CalculateDiff<Dtype><<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>>(
362- nthreads, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
363- source_num_, total_num_, dim_, kernel_val_[j]->gpu_data(),
364- kernel_gamma, delta_.gpu_data(), data, i,
365- bottom[0]->mutable_gpu_diff(), bottom[1]->mutable_gpu_diff());
366- kernel_gamma *= kernel_mul_;
367- }
368- }
369- caffe_gpu_scal(source_num_ * dim_, loss_weight_ / total_num_ / total_num_, bottom[0]->mutable_gpu_diff());
370- caffe_gpu_scal(target_num_ * dim_, loss_weight_ / total_num_ / total_num_, bottom[1]->mutable_gpu_diff());
371- */
217+
372218 if (source_num_ <= 1 || target_num_ <= 1 ) return ;
373219 int sample_num = (source_num_ > target_num_) ? source_num_ : target_num_;
374220 int s1, s2, t1, t2;
375221 Dtype* tempX1 = diff_.mutable_gpu_diff () + total_num_ * total_num_;
376222 Dtype* tempX2 = diff_.mutable_gpu_diff () + total_num_ * total_num_ + dim_;
377223 Dtype* tempY1 = NULL , *tempY2 = NULL ;
378- if (label_back_propagate_){
379- tempY1 = diff_.mutable_gpu_diff () + total_num_ * total_num_ + dim_ + dim_;
380- tempY2 = diff_.mutable_gpu_diff () + total_num_ * total_num_ + dim_ + dim_ + label_dim;
381- }
224+ tempY1 = diff_.mutable_gpu_diff () + total_num_ * total_num_ + dim_ + dim_;
225+ tempY2 = diff_.mutable_gpu_diff () + total_num_ * total_num_ + dim_ + dim_ + label_dim;
382226 for (int i = 0 ;i < sample_num;++i){
383227 s1 = rand () % source_num_;
384228 s2 = rand () % source_num_;
@@ -394,10 +238,10 @@ void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
394238 const Dtype* x_s2 = bottom[0 ]->gpu_data () + s2 * dim_;
395239 const Dtype* x_t1 = bottom[1 ]->gpu_data () + t1 * dim_;
396240 const Dtype* x_t2 = bottom[1 ]->gpu_data () + t2 * dim_;
397- const Dtype* y_s1 = (label_back_propagate_) ? bottom[2 ]->gpu_data () + s1 * label_dim : NULL ;
398- const Dtype* y_s2 = (label_back_propagate_) ? bottom[2 ]->gpu_data () + s2 * label_dim : NULL ;
399- const Dtype* y_t1 = (label_back_propagate_) ? bottom[3 ]->gpu_data () + t1 * label_dim : NULL ;
400- const Dtype* y_t2 = (label_back_propagate_) ? bottom[3 ]->gpu_data () + t2 * label_dim : NULL ;
241+ const Dtype* y_s1 = bottom[2 ]->gpu_data () + s1 * label_dim;
242+ const Dtype* y_s2 = bottom[2 ]->gpu_data () + s2 * label_dim;
243+ const Dtype* y_t1 = bottom[3 ]->gpu_data () + t1 * label_dim;
244+ const Dtype* y_t2 = bottom[3 ]->gpu_data () + t2 * label_dim;
401245
402246 caffe_gpu_sub<Dtype>(dim_, x_s1, x_s2, tempX1);
403247 caffe_gpu_sub<Dtype>(dim_, x_s2, x_s1, tempX2);
@@ -417,15 +261,13 @@ void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
417261 caffe_gpu_scal (dim_, loss_weight_ * factor_for_diff / sample_num, tempX2);
418262 caffe_gpu_add (dim_, tempX1, source_diff + s1 * dim_, source_diff + s1 * dim_);
419263 caffe_gpu_add (dim_, tempX2, source_diff + s2 * dim_, source_diff + s2 * dim_);
420- if (label_back_propagate_){
421- caffe_gpu_sub<Dtype>(label_dim, y_s1, y_s2, tempY1);
422- caffe_gpu_sub<Dtype>(label_dim, y_s2, y_s1, tempY2);
423- factor_for_diff = (-2 ) / sigma_ * x_kernel * delta_.cpu_data ()[s1 * total_num_ + s2];
424- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
425- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
426- caffe_gpu_add (label_dim, tempY1, label_source_diff + s1 * label_dim, label_source_diff + s1 * label_dim);
427- caffe_gpu_add (label_dim, tempY2, label_source_diff + s2 * label_dim, label_source_diff + s2 * label_dim);
428- }
264+ caffe_gpu_sub<Dtype>(label_dim, y_s1, y_s2, tempY1);
265+ caffe_gpu_sub<Dtype>(label_dim, y_s2, y_s1, tempY2);
266+ factor_for_diff = (-2 ) / sigma_ * x_kernel * delta_.cpu_data ()[s1 * total_num_ + s2];
267+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
268+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
269+ caffe_gpu_add (label_dim, tempY1, label_source_diff + s1 * label_dim, label_source_diff + s1 * label_dim);
270+ caffe_gpu_add (label_dim, tempY2, label_source_diff + s2 * label_dim, label_source_diff + s2 * label_dim);
429271
430272 factor_for_diff = 0 ;
431273 caffe_gpu_sub<Dtype>(dim_, x_s1, x_t2, tempX1);
@@ -445,15 +287,13 @@ void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
445287 caffe_gpu_scal (dim_, loss_weight_ * factor_for_diff / sample_num, tempX2);
446288 caffe_gpu_add (dim_, tempX1, source_diff + s1 * dim_, source_diff + s1 * dim_);
447289 caffe_gpu_add (dim_, tempX2, target_diff + t2 * dim_, target_diff + t2 * dim_);
448- if (label_back_propagate_){
449- caffe_gpu_sub<Dtype>(label_dim, y_s1, y_t2, tempY1);
450- caffe_gpu_sub<Dtype>(label_dim, y_t2, y_s1, tempY2);
451- factor_for_diff = 2 / sigma_ * x_kernel * delta_.cpu_data ()[s1 * total_num_ + source_num_ + t2];
452- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
453- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
454- caffe_gpu_add (label_dim, tempY1, label_source_diff + s1 * label_dim, label_source_diff + s1 * label_dim);
455- caffe_gpu_add (label_dim, tempY2, label_target_diff + t2 * label_dim, label_target_diff + t2 * label_dim);
456- }
290+ caffe_gpu_sub<Dtype>(label_dim, y_s1, y_t2, tempY1);
291+ caffe_gpu_sub<Dtype>(label_dim, y_t2, y_s1, tempY2);
292+ factor_for_diff = 2 / sigma_ * x_kernel * delta_.cpu_data ()[s1 * total_num_ + source_num_ + t2];
293+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
294+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
295+ caffe_gpu_add (label_dim, tempY1, label_source_diff + s1 * label_dim, label_source_diff + s1 * label_dim);
296+ caffe_gpu_add (label_dim, tempY2, label_target_diff + t2 * label_dim, label_target_diff + t2 * label_dim);
457297
458298 factor_for_diff = 0 ;
459299 caffe_gpu_sub<Dtype>(dim_, x_t1, x_s2, tempX1);
@@ -473,15 +313,13 @@ void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
473313 caffe_gpu_scal (dim_, loss_weight_ * factor_for_diff / sample_num, tempX2);
474314 caffe_gpu_add (dim_, tempX1, target_diff + t1 * dim_, target_diff + t1 * dim_);
475315 caffe_gpu_add (dim_, tempX2, source_diff + s2 * dim_, source_diff + s2 * dim_);
476- if (label_back_propagate_){
477- caffe_gpu_sub<Dtype>(label_dim, y_s2, y_t1, tempY1);
478- caffe_gpu_sub<Dtype>(label_dim, y_t1, y_s2, tempY2);
479- factor_for_diff = 2 / sigma_ * x_kernel * delta_.cpu_data ()[(t1 + source_num_) * total_num_ + s2];
480- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
481- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
482- caffe_gpu_add (label_dim, tempY1, label_source_diff + s2 * label_dim, label_source_diff + s2 * label_dim);
483- caffe_gpu_add (label_dim, tempY2, label_target_diff + t1 * label_dim, label_target_diff + t1 * label_dim);
484- }
316+ caffe_gpu_sub<Dtype>(label_dim, y_s2, y_t1, tempY1);
317+ caffe_gpu_sub<Dtype>(label_dim, y_t1, y_s2, tempY2);
318+ factor_for_diff = 2 / sigma_ * x_kernel * delta_.cpu_data ()[(t1 + source_num_) * total_num_ + s2];
319+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
320+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
321+ caffe_gpu_add (label_dim, tempY1, label_source_diff + s2 * label_dim, label_source_diff + s2 * label_dim);
322+ caffe_gpu_add (label_dim, tempY2, label_target_diff + t1 * label_dim, label_target_diff + t1 * label_dim);
485323
486324 factor_for_diff = 0 ;
487325 caffe_gpu_sub<Dtype>(dim_, x_t1, x_t2, tempX1);
@@ -501,15 +339,13 @@ void JMMDLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
501339 caffe_gpu_scal (dim_, loss_weight_ * factor_for_diff / sample_num, tempX2);
502340 caffe_gpu_add (dim_, tempX1, target_diff + t1 * dim_, target_diff + t1 * dim_);
503341 caffe_gpu_add (dim_, tempX2, target_diff + t2 * dim_, target_diff + t2 * dim_);
504- if (label_back_propagate_){
505- caffe_gpu_sub<Dtype>(label_dim, y_t1, y_t2, tempY1);
506- caffe_gpu_sub<Dtype>(label_dim, y_t2, y_t1, tempY2);
507- factor_for_diff = (-2 ) / sigma_ * x_kernel * delta_.cpu_data ()[(t1 + source_num_) * total_num_ + t2 + source_num_];
508- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
509- caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
510- caffe_gpu_add (label_dim, tempY1, label_target_diff + t1 * label_dim, label_target_diff + t1 * label_dim);
511- caffe_gpu_add (label_dim, tempY2, label_target_diff + t2 * label_dim, label_target_diff + t2 * label_dim);
512- }
342+ caffe_gpu_sub<Dtype>(label_dim, y_t1, y_t2, tempY1);
343+ caffe_gpu_sub<Dtype>(label_dim, y_t2, y_t1, tempY2);
344+ factor_for_diff = (-2 ) / sigma_ * x_kernel * delta_.cpu_data ()[(t1 + source_num_) * total_num_ + t2 + source_num_];
345+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY1);
346+ caffe_gpu_scal (label_dim, label_loss_weight_ * factor_for_diff / sample_num, tempY2);
347+ caffe_gpu_add (label_dim, tempY1, label_target_diff + t1 * label_dim, label_target_diff + t1 * label_dim);
348+ caffe_gpu_add (label_dim, tempY2, label_target_diff + t2 * label_dim, label_target_diff + t2 * label_dim);
513349 }
514350}
515351
0 commit comments