@@ -27,10 +27,10 @@ at::Tensor linear_bias_forward(at::Tensor input, at::Tensor weight, at::Tensor b
2727 // auto reserved_size = get_mlp_reserved_space(batch_size, num_layers, output_features.data());
2828
2929 // create output/workspace tensor
30- auto out = at::empty ({batch_size, out_features}, input.type ());
31- // auto reserved_space = at::empty({reserved_size}, inputs[0].type ());
30+ auto out = at::empty ({batch_size, out_features}, input.scalar_type ());
31+ // auto reserved_space = at::empty({reserved_size}, inputs[0].scalar_type ());
3232 // allocate fixed 4MB workspace for cublaslt for now, and this gets at least 4 MB
33- auto lt_workspace = at::empty ({1 << 22 }, input.type ());
33+ auto lt_workspace = at::empty ({1 << 22 }, input.scalar_type ());
3434
3535 AT_DISPATCH_FLOATING_TYPES_AND2 (at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type (), " linear_bias_forward" , [&] {
3636 scalar_t * w_ptr = weight.data_ptr <scalar_t >();
@@ -61,16 +61,16 @@ std::vector<at::Tensor> linear_bias_backward(at::Tensor input, at::Tensor weight
6161 // auto reserved_size = get_mlp_reserved_space(batch_size, num_layers, output_features.data());
6262
6363 // create output/workspace tensor
64- auto d_weight = at::empty ({out_features, in_features}, input.type ());
64+ auto d_weight = at::empty ({out_features, in_features}, input.scalar_type ());
6565#if defined(CUBLAS_VERSION) && CUBLAS_VERSION < 11600
6666 auto d_bias = d_output.view ({-1 , out_features}).sum (0 , false );
67- #else
68- auto d_bias = at::empty ({out_features}, input.type ());
69- #endif
70- auto d_input = at::empty ({batch_size, in_features}, input.type ());
71- // auto reserved_space = at::empty({reserved_size}, inputs[0].type ());
67+ #else
68+ auto d_bias = at::empty ({out_features}, input.scalar_type ());
69+ #endif
70+ auto d_input = at::empty ({batch_size, in_features}, input.scalar_type ());
71+ // auto reserved_space = at::empty({reserved_size}, inputs[0].scalar_type ());
7272 // allocate fixed 4MB workspace for cublaslt for now, and this gets at least 4 MB
73- auto lt_workspace = at::empty ({1 << 22 }, input.type ());
73+ auto lt_workspace = at::empty ({1 << 22 }, input.scalar_type ());
7474
7575 AT_DISPATCH_FLOATING_TYPES_AND2 (at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type (), " linear_bias_backward" , [&] {
7676 scalar_t * w_ptr = weight.data_ptr <scalar_t >();
@@ -103,12 +103,12 @@ std::vector<at::Tensor> linear_gelu_linear_forward(at::Tensor input, at::Tensor
103103 // auto reserved_size = get_mlp_reserved_space(batch_size, num_layers, output_features.data());
104104
105105 // create output/workspace tensor
106- auto output1 = at::empty ({batch_size, hidden_features}, input.type ());
107- auto gelu_in = at::empty ({batch_size, hidden_features}, input.type ());
108- auto output2 = at::empty ({batch_size, out_features}, input.type ());
109- // auto reserved_space = at::empty({reserved_size}, inputs[0].type ());
106+ auto output1 = at::empty ({batch_size, hidden_features}, input.scalar_type ());
107+ auto gelu_in = at::empty ({batch_size, hidden_features}, input.scalar_type ());
108+ auto output2 = at::empty ({batch_size, out_features}, input.scalar_type ());
109+ // auto reserved_space = at::empty({reserved_size}, inputs[0].scalar_type ());
110110 // allocate fixed 4MB workspace for cublaslt for now, and this gets at least 4 MB
111- auto lt_workspace = at::empty ({1 << 22 }, input.type ());
111+ auto lt_workspace = at::empty ({1 << 22 }, input.scalar_type ());
112112
113113 AT_DISPATCH_FLOATING_TYPES_AND2 (at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type (), " linear_gelu_linear_forward" , [&] {
114114 scalar_t * w1_ptr = weight1.data_ptr <scalar_t >();
@@ -146,15 +146,15 @@ std::vector<at::Tensor> linear_gelu_linear_backward(at::Tensor input, at::Tensor
146146 // auto reserved_size = get_mlp_reserved_space(batch_size, num_layers, output_features.data());
147147
148148 // create output/workspace tensor
149- auto d_weight1 = at::empty ({hidden_features, in_features}, input.type ());
150- auto d_weight2 = at::empty ({out_features, hidden_features}, input.type ());
151- auto d_bias1 = at::empty ({hidden_features}, input.type ());
152- auto d_bias2 = at::empty ({out_features}, input.type ());
153- auto d_input = at::empty ({batch_size, in_features}, input.type ());
154- auto d_output1 = at::empty ({batch_size, hidden_features}, input.type ());
155- // auto reserved_space = at::empty({reserved_size}, inputs[0].type ());
149+ auto d_weight1 = at::empty ({hidden_features, in_features}, input.scalar_type ());
150+ auto d_weight2 = at::empty ({out_features, hidden_features}, input.scalar_type ());
151+ auto d_bias1 = at::empty ({hidden_features}, input.scalar_type ());
152+ auto d_bias2 = at::empty ({out_features}, input.scalar_type ());
153+ auto d_input = at::empty ({batch_size, in_features}, input.scalar_type ());
154+ auto d_output1 = at::empty ({batch_size, hidden_features}, input.scalar_type ());
155+ // auto reserved_space = at::empty({reserved_size}, inputs[0].scalar_type ());
156156 // allocate fixed 4MB workspace for cublaslt for now, and this gets at least 4 MB
157- auto lt_workspace = at::empty ({1 << 22 }, input.type ());
157+ auto lt_workspace = at::empty ({1 << 22 }, input.scalar_type ());
158158
159159 AT_DISPATCH_FLOATING_TYPES_AND2 (at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type (), " linear_bias_backward" , [&] {
160160
0 commit comments