Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8976da7

Browse files
committed
Added fwd_intializer() / bwd_initializer
Removed memory leaks which does not cleared while removing layers
1 parent 1aa8df1 commit 8976da7

File tree

4 files changed

+318
-105
lines changed

4 files changed

+318
-105
lines changed

Chapter10/10_deep_learning/01_ann/src/layer.cu

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ Layer::~Layer()
2828
std::cout << "Destroy Layer: " << name_ << std::endl;
2929
#endif
3030

31-
if (output_ != nullptr) delete output_;
32-
if (grad_input_ != nullptr) delete grad_input_;
31+
if (output_ != nullptr) { delete output_; output_ = nullptr; }
32+
if (grad_input_ != nullptr) { delete grad_input_; grad_input_ = nullptr; }
3333

34-
if (weights_ != nullptr) delete weights_;
35-
if (biases_ != nullptr) delete biases_;
36-
if (grad_weights_ != nullptr) delete grad_weights_;
37-
if (grad_biases_ != nullptr) delete grad_biases_;
34+
if (weights_ != nullptr) { delete weights_; weights_ = nullptr; }
35+
if (biases_ != nullptr) { delete biases_; biases_ = nullptr; }
36+
if (grad_weights_ != nullptr) { delete grad_weights_; grad_weights_ = nullptr; }
37+
if (grad_biases_ != nullptr) { delete grad_biases_; grad_biases_ = nullptr; }
3838
}
3939

4040
void Layer::init_weight_bias(unsigned int seed)
@@ -179,8 +179,7 @@ Dense::Dense(std::string name, int output_size)
179179

180180
Dense::~Dense()
181181
{
182-
if (d_one_vec != nullptr)
183-
cudaFree(d_one_vec);
182+
if (d_one_vec != nullptr) { cudaFree(d_one_vec); d_one_vec = nullptr; }
184183
}
185184

186185
__global__ void init_one_vec(float* d_one_vec, size_t length)
@@ -192,8 +191,10 @@ __global__ void init_one_vec(float* d_one_vec, size_t length)
192191
d_one_vec[i] = 1.f;
193192
}
194193

195-
Blob<float> *Dense::forward(Blob<float> *input)
194+
bool Dense::fwd_initialize(Blob<float> *input)
196195
{
196+
bool is_initialize = false;
197+
197198
// initialize weights and biases
198199
if (weights_ == nullptr)
199200
{
@@ -204,6 +205,7 @@ Blob<float> *Dense::forward(Blob<float> *input)
204205
weights_ = new Blob<float>(1, 1, input_size_, output_size_);
205206
biases_ = new Blob<float>(1, 1, output_size_);
206207

208+
is_initialize = true;
207209
}
208210

209211
// initilaize input and output
@@ -241,9 +243,15 @@ Blob<float> *Dense::forward(Blob<float> *input)
241243
{
242244
/* do nothing */
243245
}
246+
247+
is_initialize = true;
244248
}
245249

250+
return is_initialize;
251+
}
246252

253+
Blob<float> *Dense::forward(Blob<float> *input)
254+
{
247255
// output = weights^T * input (without biases)
248256
checkCublasErrors(
249257
cublasSgemm(cuda_->cublas(),
@@ -275,12 +283,16 @@ Blob<float> *Dense::forward(Blob<float> *input)
275283
return output_;
276284
}
277285

278-
Blob<float> *Dense::backward(Blob<float> *grad_output)
286+
bool Dense::bwd_initialize(Blob<float> *grad_output)
279287
{
288+
bool is_initialize = false;
289+
280290
if (grad_weights_ == nullptr)
281291
{
282292
grad_weights_ = new Blob<float>(weights_->shape());
283293
grad_biases_ = new Blob<float>(biases_->shape());
294+
295+
is_initialize = true;
284296
}
285297

286298
if (grad_input_ == nullptr || batch_size_ != grad_output->n())
@@ -291,8 +303,15 @@ Blob<float> *Dense::backward(Blob<float> *grad_output)
291303
grad_input_ = new Blob<float>(input_->shape());
292304
else
293305
grad_input_->reset(input_->shape());
306+
307+
is_initialize = true;
294308
}
295309

310+
return is_initialize;
311+
}
312+
313+
Blob<float> *Dense::backward(Blob<float> *grad_output)
314+
{
296315
// db = (dy) * d_one_vec
297316
cublasSgemv(cuda_->cublas(),
298317
CUBLAS_OP_N,
@@ -343,20 +362,22 @@ Blob<float> *Dense::backward(Blob<float> *grad_output)
343362
Activation::Activation(std::string name, cudnnActivationMode_t mode, float coef)
344363
{
345364
name_ = name;
346-
mode_ = mode;
347-
coef_ = coef;
365+
act_mode_ = mode;
366+
act_coef_ = coef;
348367

349368
cudnnCreateActivationDescriptor(&act_desc_);
350-
cudnnSetActivationDescriptor(act_desc_, mode, CUDNN_PROPAGATE_NAN, coef);
369+
cudnnSetActivationDescriptor(act_desc_, act_mode_, CUDNN_PROPAGATE_NAN, act_coef_);
351370
}
352371

353372
Activation::~Activation()
354373
{
355374
cudnnDestroyActivationDescriptor(act_desc_);
356375
}
357376

358-
Blob<float> *Activation::forward(Blob<float> *input)
377+
bool Activation::fwd_initialize(Blob<float> *input)
359378
{
379+
bool is_initialize = false;
380+
360381
if (input_ == nullptr || batch_size_ != input->n())
361382
{
362383
input_ = input;
@@ -369,8 +390,18 @@ Blob<float> *Activation::forward(Blob<float> *input)
369390
output_->reset(input->shape());
370391

371392
output_desc_ = output_->tensor();
393+
394+
// input_->print( name_ + "::input", false);
395+
// output_desc_->print( name_ + "::output", false);
396+
397+
is_initialize = true;
372398
}
373399

400+
return is_initialize;
401+
}
402+
403+
Blob<float> *Activation::forward(Blob<float> *input)
404+
{
374405
cudnnActivationForward(cuda_->cudnn(),
375406
act_desc_,
376407
&cuda_->one,
@@ -383,18 +414,27 @@ Blob<float> *Activation::forward(Blob<float> *input)
383414
return output_;
384415
}
385416

386-
Blob<float> *Activation::backward(Blob<float> *grad_output)
417+
bool Activation::bwd_initialize(Blob<float> *grad_output)
387418
{
419+
bool is_initialize = false;
420+
388421
if (grad_input_ == nullptr || batch_size_ != grad_output->n())
389422
{
390423
grad_output_ = grad_output;
391424

392425
if (grad_input_ == nullptr)
393426
grad_input_ = new Blob<float>(input_->shape());
394427
else
395-
grad_input_->reset(input_->shape());
428+
grad_input_->reset(input_->shape());
429+
430+
is_initialize = true;
396431
}
397432

433+
return is_initialize;
434+
}
435+
436+
Blob<float> *Activation::backward(Blob<float> *grad_output)
437+
{
398438
cudnnActivationBackward(cuda_->cudnn(),
399439
act_desc_,
400440
&cuda_->one,
@@ -418,11 +458,13 @@ Softmax::Softmax(std::string name)
418458

419459
Softmax::~Softmax()
420460
{
421-
461+
// do nothing
422462
}
423463

424-
Blob<float> *Softmax::forward(Blob<float> *input)
464+
bool Softmax::fwd_initialize(Blob<float> *input)
425465
{
466+
bool is_initialize = false;
467+
426468
if (input_ == nullptr || batch_size_ != input->n())
427469
{
428470
input_ = input;
@@ -435,8 +477,15 @@ Blob<float> *Softmax::forward(Blob<float> *input)
435477
output_->reset(input->shape());
436478

437479
output_desc_ = output_->tensor();
480+
481+
is_initialize = false;
438482
}
439483

484+
return is_initialize;
485+
}
486+
487+
Blob<float> *Softmax::forward(Blob<float> *input)
488+
{
440489
#if (DEBUG_SOFTMAX & 0x01)
441490
std::cout << name_ << "[FORWARD]" << std::endl;
442491
input_->print(name_ + "::input", true, input->n());
@@ -454,18 +503,25 @@ Blob<float> *Softmax::forward(Blob<float> *input)
454503
return output_;
455504
}
456505

457-
Blob<float> *Softmax::backward(Blob<float> *target)
506+
bool Softmax::bwd_initialize(Blob<float> *target)
458507
{
459-
checkCudaErrors(cudaDeviceSynchronize());
508+
bool is_initialize = false;
460509

461510
if (grad_input_ == nullptr || batch_size_ != target->n())
462511
{
463512
if (grad_input_ == nullptr)
464513
grad_input_ = new Blob<float>(input_->shape());
465514
else
466515
grad_input_->reset(input_->shape());
516+
517+
is_initialize = true;
467518
}
468519

520+
return is_initialize;
521+
}
522+
523+
Blob<float> *Softmax::backward(Blob<float> *target)
524+
{
469525
// set grad_input_ as predict
470526
checkCudaErrors(cudaMemcpyAsync(grad_input_->cuda(),
471527
output_->cuda(), output_->buf_size(),

Chapter10/10_deep_learning/01_ann/src/layer.h

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class Layer
1616
{
1717
public:
1818
Layer();
19-
~Layer();
19+
virtual ~Layer();
2020

2121
virtual Blob<float> *forward(Blob<float> *input) = 0;
2222
virtual Blob<float> *backward(Blob<float> *grad_input) = 0;
@@ -36,6 +36,9 @@ class Layer
3636
void unfreeze() { freeze_ = false;}
3737

3838
protected:
39+
virtual bool fwd_initialize(Blob<float> *input) = 0;
40+
virtual bool bwd_initialize(Blob<float> *grad_output) = 0;
41+
3942
// name of layer
4043
std::string name_;
4144

@@ -83,12 +86,15 @@ class Dense: public Layer
8386
{
8487
public:
8588
Dense(std::string name, int out_size);
86-
~Dense();
89+
virtual ~Dense();
90+
91+
virtual Blob<float> *forward(Blob<float> *input);
92+
virtual Blob<float> *backward(Blob<float> *grad_input);
8793

88-
Blob<float> *forward(Blob<float> *input);
89-
Blob<float> *backward(Blob<float> *grad_input);
94+
private:
95+
bool fwd_initialize(Blob<float> *input);
96+
bool bwd_initialize(Blob<float> *grad_output);
9097

91-
private:
9298
int input_size_ = 0;
9399
int output_size_= 0;
94100

@@ -99,30 +105,36 @@ class Activation: public Layer
99105
{
100106
public:
101107
Activation(std::string name, cudnnActivationMode_t mode, float coef = 0.f);
102-
~Activation();
108+
virtual ~Activation();
103109

104-
Blob<float> *forward(Blob<float> *input);
105-
Blob<float> *backward(Blob<float> *grad_input);
110+
virtual Blob<float> *forward(Blob<float> *input);
111+
virtual Blob<float> *backward(Blob<float> *grad_input);
106112

107113
private:
114+
bool fwd_initialize(Blob<float> *input);
115+
bool bwd_initialize(Blob<float> *grad_output);
116+
108117
cudnnActivationDescriptor_t act_desc_;
109-
cudnnActivationMode_t mode_;
110-
float coef_;
118+
cudnnActivationMode_t act_mode_;
119+
float act_coef_;
111120
};
112121

113122
class Softmax: public Layer
114123
{
115124
public:
116125
Softmax(std::string name);
117-
~Softmax();
126+
virtual ~Softmax();
118127

119-
Blob<float> *forward(Blob<float> *input);
120-
Blob<float> *backward(Blob<float> *grad_input);
128+
virtual Blob<float> *forward(Blob<float> *input);
129+
virtual Blob<float> *backward(Blob<float> *grad_input);
121130

122131
float get_loss(Blob<float> *target);
123132
int get_accuracy(Blob<float> *target);
124133

125-
private:
134+
protected:
135+
bool fwd_initialize(Blob<float> *input);
136+
bool bwd_initialize(Blob<float> *grad_output);
137+
126138
CrossEntropyLoss loss_;
127139
};
128140

0 commit comments

Comments
 (0)