diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1bf2231..fcd93915 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,8 @@ add_library(neural-fortran
   src/nf.f90
   src/nf/nf_activation.f90
   src/nf/nf_base_layer.f90
+  src/nf/nf_conv1d_layer.f90
+  src/nf/nf_conv1d_layer_submodule.f90
   src/nf/nf_conv2d_layer.f90
   src/nf/nf_conv2d_layer_submodule.f90
   src/nf/nf_cross_attention_layer.f90
@@ -37,12 +39,20 @@ add_library(neural-fortran
   src/nf/nf_input3d_layer_submodule.f90
   src/nf/nf_layer_constructors.f90
   src/nf/nf_layer_constructors_submodule.f90
+  src/nf/nf_layernorm.f90
+  src/nf/nf_layernorm_submodule.f90
   src/nf/nf_layer.f90
   src/nf/nf_layer_submodule.f90
+  src/nf/nf_locally_connected1d_layer_submodule.f90
+  src/nf/nf_locally_connected1d_layer.f90
   src/nf/nf_linear2d_layer.f90
   src/nf/nf_linear2d_layer_submodule.f90
+  src/nf/nf_embedding_layer.f90
+  src/nf/nf_embedding_layer_submodule.f90
   src/nf/nf_loss.f90
   src/nf/nf_loss_submodule.f90
+  src/nf/nf_maxpool1d_layer.f90
+  src/nf/nf_maxpool1d_layer_submodule.f90
   src/nf/nf_maxpool2d_layer.f90
   src/nf/nf_maxpool2d_layer_submodule.f90
   src/nf/nf_metrics.f90
@@ -56,6 +66,8 @@ add_library(neural-fortran
   src/nf/nf_random.f90
   src/nf/nf_reshape_layer.f90
   src/nf/nf_reshape_layer_submodule.f90
+  src/nf/nf_reshape2d_layer.f90
+  src/nf/nf_reshape2d_layer_submodule.f90
   src/nf/nf_self_attention_layer.f90
   src/nf/io/nf_io_binary.f90
   src/nf/io/nf_io_binary_submodule.f90
diff --git a/README.md b/README.md
index a04ac32a..75da6525 100644
--- a/README.md
+++ b/README.md
@@ -30,17 +30,21 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
+| Embedding | `embedding` | n/a | 2 | ✅ | ✅ |
 | Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ |
 | Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
-| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
+| Locally connected (1-d) | `locally_connected1d` | `input2d`, `locally_connected1d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
+| Convolutional (1-d) | `conv1d` | `input2d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
+| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
+| Max-pooling (1-d) | `maxpool1d` | `input2d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
-| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
-| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 | ✅ | ✅ |
 | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
+| Reshape (1-d to 2-d) | `reshape2d` | `input2d`, `conv1d`, `locally_connected1d`, `maxpool1d` | 2 | ✅ | ✅ |
 | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |
 
-(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.
-
 ## Getting started
 
 Get the code:
@@ -265,7 +269,9 @@ Thanks to all open-source contributors to neural-fortran:
 [jvdp1](https://github.com/jvdp1),
 [jvo203](https://github.com/jvo203),
 [milancurcic](https://github.com/milancurcic),
+[OneAdder](https://github.com/OneAdder),
 [pirpyn](https://github.com/pirpyn),
+[rico07](https://github.com/ricor07),
 [rouson](https://github.com/rouson),
 [rweed](https://github.com/rweed),
 [Spnetic-5](https://github.com/Spnetic-5),
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index f4b706b8..b131c47c 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -1,5 +1,6 @@
 foreach(execid
   cnn_mnist
+  cnn_mnist_1d
   dense_mnist
   get_set_network_params
   network_parameters
diff --git a/example/cnn_mnist.f90 b/example/cnn_mnist.f90
index bec50b80..ef22f986 100644
--- a/example/cnn_mnist.f90
+++ b/example/cnn_mnist.f90
@@ -12,7 +12,7 @@ program cnn_mnist
   real, allocatable :: validation_images(:,:), validation_labels(:)
   real, allocatable :: testing_images(:,:), testing_labels(:)
   integer :: n
-  integer, parameter :: num_epochs = 10
+  integer, parameter :: num_epochs = 250
 
   call load_mnist(training_images, training_labels, &
                   validation_images, validation_labels, &
@@ -35,9 +35,9 @@ program cnn_mnist
     call net % train( &
       training_images, &
       label_digits(training_labels), &
-      batch_size=128, &
+      batch_size=16, &
       epochs=1, &
-      optimizer=sgd(learning_rate=3.) &
+      optimizer=sgd(learning_rate=0.001) &
     )
 
     print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
diff --git a/example/cnn_mnist_1d.f90 b/example/cnn_mnist_1d.f90
new file mode 100644
index 00000000..7e978034
--- /dev/null
+++ b/example/cnn_mnist_1d.f90
@@ -0,0 +1,67 @@
+program cnn_mnist_1d
+
+    use nf, only: network, sgd, &
+      input, conv1d, maxpool1d, flatten, dense, reshape, reshape2d, locally_connected1d, &
+      load_mnist, label_digits, softmax, relu
+  
+    implicit none
+  
+    type(network) :: net
+  
+    real, allocatable :: training_images(:,:), training_labels(:)
+    real, allocatable :: validation_images(:,:), validation_labels(:)
+    real, allocatable :: testing_images(:,:), testing_labels(:)
+    integer :: n
+    integer, parameter :: num_epochs = 250
+  
+    call load_mnist(training_images, training_labels, &
+                    validation_images, validation_labels, &
+                    testing_images, testing_labels)
+  
+    net = network([ &
+      input(784), &
+      reshape2d([28, 28]), &
+      locally_connected1d(filters=8, kernel_size=3, activation=relu()), &
+      maxpool1d(pool_size=2), &
+      locally_connected1d(filters=16, kernel_size=3, activation=relu()), &
+      maxpool1d(pool_size=2), &
+      dense(10, activation=softmax()) &
+    ])
+  
+    call net % print_info()
+  
+    epochs: do n = 1, num_epochs
+  
+      call net % train( &
+        training_images, &
+        label_digits(training_labels), &
+        batch_size=16, &
+        epochs=1, &
+        optimizer=sgd(learning_rate=0.01) &
+      )
+  
+      print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
+        net, validation_images, label_digits(validation_labels)) * 100, ' %'
+  
+    end do epochs
+  
+    print '(a,f5.2,a)', 'Testing accuracy: ', &
+      accuracy(net, testing_images, label_digits(testing_labels)) * 100, '%'
+  
+  contains
+  
+    real function accuracy(net, x, y)
+      type(network), intent(in out) :: net
+      real, intent(in) :: x(:,:), y(:,:)
+      integer :: i, good
+      good = 0
+      do i = 1, size(x, dim=2)
+        if (all(maxloc(net % predict(x(:,i))) == maxloc(y(:,i)))) then
+          good = good + 1
+        end if
+      end do
+      accuracy = real(good) / size(x, dim=2)
+    end function accuracy
+  
+  end program cnn_mnist_1d
+  
\ No newline at end of file
diff --git a/fpm.toml b/fpm.toml
index ebcceeb6..15a746e4 100644
--- a/fpm.toml
+++ b/fpm.toml
@@ -1,5 +1,5 @@
 name = "neural-fortran"
-version = "0.19.0"
+version = "0.20.0"
 license = "MIT"
 author = "Milan Curcic"
 maintainer = "mcurcic@miami.edu"
diff --git a/src/nf.f90 b/src/nf.f90
index 39f67ea3..172eafb3 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -3,14 +3,20 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
+    conv1d, &
     conv2d, &
     dense, &
     dropout, &
+    embedding, &
     flatten, &
     input, &
+    layernorm, &
     linear2d, &
+    locally_connected1d, &
+    maxpool1d, &
     maxpool2d, &
     reshape, &
+    reshape2d, &
     self_attention
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
diff --git a/src/nf/nf_activation.f90 b/src/nf/nf_activation.f90
index 309b43d2..caeab138 100644
--- a/src/nf/nf_activation.f90
+++ b/src/nf/nf_activation.f90
@@ -25,12 +25,14 @@ module nf_activation
   contains
     procedure(eval_1d_i), deferred :: eval_1d
     procedure(eval_1d_i), deferred :: eval_1d_prime
+    procedure(eval_2d_i), deferred :: eval_2d
+    procedure(eval_2d_i), deferred :: eval_2d_prime
     procedure(eval_3d_i), deferred :: eval_3d
     procedure(eval_3d_i), deferred :: eval_3d_prime
     procedure :: get_name
 
-    generic :: eval => eval_1d, eval_3d
-    generic :: eval_prime => eval_1d_prime, eval_3d_prime
+    generic :: eval => eval_1d, eval_2d, eval_3d
+    generic :: eval_prime => eval_1d_prime, eval_2d_prime, eval_3d_prime
 
   end type activation_function
 
@@ -43,6 +45,13 @@ pure function eval_1d_i(self, x) result(res)
       real :: res(size(x))
     end function eval_1d_i
 
+    pure function eval_2d_i(self, x) result(res)
+      import :: activation_function
+      class(activation_function), intent(in) :: self
+      real, intent(in) :: x(:,:)
+      real :: res(size(x,1),size(x,2))
+    end function eval_2d_i
+
     pure function eval_3d_i(self, x) result(res)
       import :: activation_function
       class(activation_function), intent(in) :: self
@@ -57,6 +66,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_elu
     procedure :: eval_1d_prime => eval_1d_elu_prime
+    procedure :: eval_2d       => eval_2d_elu
+    procedure :: eval_2d_prime => eval_2d_elu_prime
     procedure :: eval_3d       => eval_3d_elu
     procedure :: eval_3d_prime => eval_3d_elu_prime
   end type elu
@@ -65,6 +76,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_exponential
     procedure :: eval_1d_prime => eval_1d_exponential
+    procedure :: eval_2d       => eval_2d_exponential
+    procedure :: eval_2d_prime => eval_2d_exponential
     procedure :: eval_3d       => eval_3d_exponential
     procedure :: eval_3d_prime => eval_3d_exponential
   end type exponential
@@ -73,6 +86,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_gaussian
     procedure :: eval_1d_prime => eval_1d_gaussian_prime
+    procedure :: eval_2d       => eval_2d_gaussian
+    procedure :: eval_2d_prime => eval_2d_gaussian_prime
     procedure :: eval_3d       => eval_3d_gaussian
     procedure :: eval_3d_prime => eval_3d_gaussian_prime
   end type gaussian
@@ -81,6 +96,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_linear
     procedure :: eval_1d_prime => eval_1d_linear_prime
+    procedure :: eval_2d       => eval_2d_linear
+    procedure :: eval_2d_prime => eval_2d_linear_prime
     procedure :: eval_3d       => eval_3d_linear
     procedure :: eval_3d_prime => eval_3d_linear_prime
   end type linear
@@ -89,6 +106,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_relu
     procedure :: eval_1d_prime => eval_1d_relu_prime
+    procedure :: eval_2d       => eval_2d_relu
+    procedure :: eval_2d_prime => eval_2d_relu_prime
     procedure :: eval_3d       => eval_3d_relu
     procedure :: eval_3d_prime => eval_3d_relu_prime
   end type relu
@@ -98,6 +117,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_leaky_relu
     procedure :: eval_1d_prime => eval_1d_leaky_relu_prime
+    procedure :: eval_2d       => eval_2d_leaky_relu
+    procedure :: eval_2d_prime => eval_2d_leaky_relu_prime
     procedure :: eval_3d       => eval_3d_leaky_relu
     procedure :: eval_3d_prime => eval_3d_leaky_relu_prime
   end type leaky_relu
@@ -106,6 +127,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_sigmoid
     procedure :: eval_1d_prime => eval_1d_sigmoid_prime
+    procedure :: eval_2d       => eval_2d_sigmoid
+    procedure :: eval_2d_prime => eval_2d_sigmoid_prime
     procedure :: eval_3d       => eval_3d_sigmoid
     procedure :: eval_3d_prime => eval_3d_sigmoid_prime
   end type sigmoid
@@ -114,6 +137,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_softmax
     procedure :: eval_1d_prime => eval_1d_softmax_prime
+    procedure :: eval_2d       => eval_2d_softmax
+    procedure :: eval_2d_prime => eval_2d_softmax_prime
     procedure :: eval_3d       => eval_3d_softmax
     procedure :: eval_3d_prime => eval_3d_softmax_prime
   end type softmax
@@ -122,6 +147,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_softplus
     procedure :: eval_1d_prime => eval_1d_softplus_prime
+    procedure :: eval_2d       => eval_2d_softplus
+    procedure :: eval_2d_prime => eval_2d_softplus_prime
     procedure :: eval_3d       => eval_3d_softplus
     procedure :: eval_3d_prime => eval_3d_softplus_prime
   end type softplus
@@ -130,6 +157,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_step
     procedure :: eval_1d_prime => eval_1d_step_prime
+    procedure :: eval_2d       => eval_2d_step
+    procedure :: eval_2d_prime => eval_2d_step_prime
     procedure :: eval_3d       => eval_3d_step
     procedure :: eval_3d_prime => eval_3d_step_prime
   end type step
@@ -138,6 +167,8 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_tanh
     procedure :: eval_1d_prime => eval_1d_tanh_prime
+    procedure :: eval_2d       => eval_2d_tanh
+    procedure :: eval_2d_prime => eval_2d_tanh_prime
     procedure :: eval_3d       => eval_3d_tanh
     procedure :: eval_3d_prime => eval_3d_tanh_prime
   end type tanhf
@@ -147,14 +178,16 @@ end function eval_3d_i
   contains
     procedure :: eval_1d       => eval_1d_celu
     procedure :: eval_1d_prime => eval_1d_celu_prime
+    procedure :: eval_2d       => eval_2d_celu
+    procedure :: eval_2d_prime => eval_2d_celu_prime
     procedure :: eval_3d       => eval_3d_celu
     procedure :: eval_3d_prime => eval_3d_celu_prime
   end type celu
 
 contains
 
+  ! ELU Activation Functions
   pure function eval_1d_elu(self, x) result(res)
-    ! Exponential Linear Unit (ELU) activation function.
     class(elu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -166,8 +199,6 @@ pure function eval_1d_elu(self, x) result(res)
   end function eval_1d_elu
 
   pure function eval_1d_elu_prime(self, x) result(res)
-    ! First derivative of the Exponential Linear Unit (ELU)
-    ! activation function.
     class(elu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -178,8 +209,29 @@ pure function eval_1d_elu_prime(self, x) result(res)
     end where
   end function eval_1d_elu_prime
 
+  pure function eval_2d_elu(self, x) result(res)
+    class(elu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    where (x >= 0)
+      res = x
+    elsewhere
+      res = self % alpha * (exp(x) - 1)
+    end where
+  end function eval_2d_elu
+
+  pure function eval_2d_elu_prime(self, x) result(res)
+    class(elu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    where (x >= 0)
+      res = 1
+    elsewhere
+      res = self % alpha * exp(x)
+    end where
+  end function eval_2d_elu_prime
+
   pure function eval_3d_elu(self, x) result(res)
-    ! Exponential Linear Unit (ELU) activation function.
     class(elu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -191,8 +243,6 @@ pure function eval_3d_elu(self, x) result(res)
   end function eval_3d_elu
 
   pure function eval_3d_elu_prime(self, x) result(res)
-    ! First derivative of the Exponential Linear Unit (ELU)
-    ! activation function.
     class(elu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -203,24 +253,30 @@ pure function eval_3d_elu_prime(self, x) result(res)
     end where
   end function eval_3d_elu_prime
 
+  ! Exponential Activation Functions
   pure function eval_1d_exponential(self, x) result(res)
-    ! Exponential activation function.
     class(exponential), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = exp(x)
   end function eval_1d_exponential
 
+  pure function eval_2d_exponential(self, x) result(res)
+    class(exponential), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = exp(x)
+  end function eval_2d_exponential
+
   pure function eval_3d_exponential(self, x) result(res)
-    ! Exponential activation function.
     class(exponential), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = exp(x)
   end function eval_3d_exponential
 
+  ! Gaussian Activation Functions
   pure function eval_1d_gaussian(self, x) result(res)
-    ! Gaussian activation function.
     class(gaussian), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -228,15 +284,27 @@ pure function eval_1d_gaussian(self, x) result(res)
   end function eval_1d_gaussian
 
   pure function eval_1d_gaussian_prime(self, x) result(res)
-    ! First derivative of the Gaussian activation function.
     class(gaussian), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = -2 * x * self % eval_1d(x)
   end function eval_1d_gaussian_prime
 
+  pure function eval_2d_gaussian(self, x) result(res)
+    class(gaussian), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = exp(-x**2)
+  end function eval_2d_gaussian
+
+  pure function eval_2d_gaussian_prime(self, x) result(res)
+    class(gaussian), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = -2 * x * self % eval_2d(x)
+  end function eval_2d_gaussian_prime
+
   pure function eval_3d_gaussian(self, x) result(res)
-    ! Gaussian activation function.
     class(gaussian), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -244,15 +312,14 @@ pure function eval_3d_gaussian(self, x) result(res)
   end function eval_3d_gaussian
 
   pure function eval_3d_gaussian_prime(self, x) result(res)
-    ! First derivative of the Gaussian activation function.
     class(gaussian), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = -2 * x * self % eval_3d(x)
   end function eval_3d_gaussian_prime
 
+  ! Linear Activation Functions
   pure function eval_1d_linear(self, x) result(res)
-    ! Linear activation function.
     class(linear), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -260,15 +327,27 @@ pure function eval_1d_linear(self, x) result(res)
   end function eval_1d_linear
 
   pure function eval_1d_linear_prime(self, x) result(res)
-    ! First derivative of the Linear activation function.
     class(linear), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = 1
   end function eval_1d_linear_prime
 
+  pure function eval_2d_linear(self, x) result(res)
+    class(linear), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = x
+  end function eval_2d_linear
+
+  pure function eval_2d_linear_prime(self, x) result(res)
+    class(linear), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = 1
+  end function eval_2d_linear_prime
+
   pure function eval_3d_linear(self, x) result(res)
-    ! Linear activation function.
     class(linear), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -276,15 +355,14 @@ pure function eval_3d_linear(self, x) result(res)
   end function eval_3d_linear
 
   pure function eval_3d_linear_prime(self, x) result(res)
-    ! First derivative of the Linear activation function.
     class(linear), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = 1
   end function eval_3d_linear_prime
 
+  ! ReLU Activation Functions
   pure function eval_1d_relu(self, x) result(res)
-    !! Rectified Linear Unit (ReLU) activation function.
     class(relu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -292,15 +370,27 @@ pure function eval_1d_relu(self, x) result(res)
   end function eval_1d_relu
 
   pure function eval_1d_relu_prime(self, x) result(res)
-    ! First derivative of the Rectified Linear Unit (ReLU) activation function.
     class(relu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = merge(1., 0., x > 0)
   end function eval_1d_relu_prime
 
+  pure function eval_2d_relu(self, x) result(res)
+    class(relu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = max(0., x)
+  end function eval_2d_relu
+
+  pure function eval_2d_relu_prime(self, x) result(res)
+    class(relu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = merge(1., 0., x > 0)
+  end function eval_2d_relu_prime
+
   pure function eval_3d_relu(self, x) result(res)
-    !! Rectified Linear Unit (ReLU) activation function.
     class(relu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -308,15 +398,14 @@ pure function eval_3d_relu(self, x) result(res)
   end function eval_3d_relu
 
   pure function eval_3d_relu_prime(self, x) result(res)
-    ! First derivative of the Rectified Linear Unit (ReLU) activation function.
     class(relu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = merge(1., 0., x > 0)
   end function eval_3d_relu_prime
 
+  ! Leaky ReLU Activation Functions
   pure function eval_1d_leaky_relu(self, x) result(res)
-    !! Leaky Rectified Linear Unit (Leaky ReLU) activation function.
     class(leaky_relu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -324,15 +413,27 @@ pure function eval_1d_leaky_relu(self, x) result(res)
   end function eval_1d_leaky_relu
 
   pure function eval_1d_leaky_relu_prime(self, x) result(res)
-    ! First derivative of the Leaky Rectified Linear Unit (Leaky ReLU) activation function.
     class(leaky_relu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = merge(1., self%alpha, x > 0)
   end function eval_1d_leaky_relu_prime
 
+  pure function eval_2d_leaky_relu(self, x) result(res)
+    class(leaky_relu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = max(self % alpha * x, x)
+  end function eval_2d_leaky_relu
+
+  pure function eval_2d_leaky_relu_prime(self, x) result(res)
+    class(leaky_relu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = merge(1., self%alpha, x > 0)
+  end function eval_2d_leaky_relu_prime
+
   pure function eval_3d_leaky_relu(self, x) result(res)
-    !! Leaky Rectified Linear Unit (Leaky ReLU) activation function.
     class(leaky_relu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -340,47 +441,57 @@ pure function eval_3d_leaky_relu(self, x) result(res)
   end function eval_3d_leaky_relu
 
   pure function eval_3d_leaky_relu_prime(self, x) result(res)
-    ! First derivative of the Leaky Rectified Linear Unit (Leaky ReLU) activation function.
     class(leaky_relu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = merge(1., self%alpha, x > 0)
   end function eval_3d_leaky_relu_prime
 
+  ! Sigmoid Activation Functions
   pure function eval_1d_sigmoid(self, x) result(res)
-    ! Sigmoid activation function.
     class(sigmoid), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = 1 / (1 + exp(-x))
-  endfunction eval_1d_sigmoid
+  end function eval_1d_sigmoid
 
   pure function eval_1d_sigmoid_prime(self, x) result(res)
-    ! First derivative of the sigmoid activation function.
     class(sigmoid), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = self % eval_1d(x) * (1 - self % eval_1d(x))
   end function eval_1d_sigmoid_prime
 
+  pure function eval_2d_sigmoid(self, x) result(res)
+    class(sigmoid), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = 1 / (1 + exp(-x))
+  end function eval_2d_sigmoid
+
+  pure function eval_2d_sigmoid_prime(self, x) result(res)
+    class(sigmoid), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = self % eval_2d(x) * (1 - self % eval_2d(x))
+  end function eval_2d_sigmoid_prime
+
   pure function eval_3d_sigmoid(self, x) result(res)
-    ! Sigmoid activation function.
     class(sigmoid), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = 1 / (1 + exp(-x))
-  endfunction eval_3d_sigmoid
+  end function eval_3d_sigmoid
 
   pure function eval_3d_sigmoid_prime(self, x) result(res)
-    ! First derivative of the sigmoid activation function.
     class(sigmoid), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = self % eval_3d(x) * (1 - self % eval_3d(x))
   end function eval_3d_sigmoid_prime
 
+  ! Softmax Activation Functions
   pure function eval_1d_softmax(self, x) result(res)
-    !! Softmax activation function
     class(softmax), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -389,15 +500,28 @@ pure function eval_1d_softmax(self, x) result(res)
   end function eval_1d_softmax
 
   pure function eval_1d_softmax_prime(self, x) result(res)
-    !! Derivative of the softmax activation function.
     class(softmax), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = self%eval_1d(x) * (1 - self%eval_1d(x))
   end function eval_1d_softmax_prime
 
+  pure function eval_2d_softmax(self, x) result(res)
+    class(softmax), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = exp(x - maxval(x))
+    res = res / sum(res)
+  end function eval_2d_softmax
+
+  pure function eval_2d_softmax_prime(self, x) result(res)
+    class(softmax), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = self % eval_2d(x) * (1 - self % eval_2d(x))
+  end function eval_2d_softmax_prime
+
   pure function eval_3d_softmax(self, x) result(res)
-    !! Softmax activation function
     class(softmax), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -406,15 +530,14 @@ pure function eval_3d_softmax(self, x) result(res)
   end function eval_3d_softmax
 
   pure function eval_3d_softmax_prime(self, x) result(res)
-    !! Derivative of the softmax activation function.
     class(softmax), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = self % eval_3d(x) * (1 - self % eval_3d(x))
   end function eval_3d_softmax_prime
 
+  ! Softplus Activation Functions
   pure function eval_1d_softplus(self, x) result(res)
-    ! Softplus activation function.
     class(softplus), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -422,15 +545,27 @@ pure function eval_1d_softplus(self, x) result(res)
   end function eval_1d_softplus
 
   pure function eval_1d_softplus_prime(self, x) result(res)
-      class(softplus), intent(in) :: self
-    ! First derivative of the softplus activation function.
+    class(softplus), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = exp(x) / (exp(x) + 1)
   end function eval_1d_softplus_prime
 
+  pure function eval_2d_softplus(self, x) result(res)
+    class(softplus), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = log(exp(x) + 1)
+  end function eval_2d_softplus
+
+  pure function eval_2d_softplus_prime(self, x) result(res)
+    class(softplus), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = exp(x) / (exp(x) + 1)
+  end function eval_2d_softplus_prime
+
   pure function eval_3d_softplus(self, x) result(res)
-    ! Softplus activation function.
     class(softplus), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -438,15 +573,14 @@ pure function eval_3d_softplus(self, x) result(res)
   end function eval_3d_softplus
 
   pure function eval_3d_softplus_prime(self, x) result(res)
-      class(softplus), intent(in) :: self
-    ! First derivative of the softplus activation function.
+    class(softplus), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = exp(x) / (exp(x) + 1)
   end function eval_3d_softplus_prime
 
+  ! Step Activation Functions
   pure function eval_1d_step(self, x) result(res)
-    ! Step activation function.
     class(step), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -454,15 +588,27 @@ pure function eval_1d_step(self, x) result(res)
   end function eval_1d_step
 
   pure function eval_1d_step_prime(self, x) result(res)
-    ! First derivative of the step activation function.
     class(step), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = 0
   end function eval_1d_step_prime
 
+  pure function eval_2d_step(self, x) result(res)
+    class(step), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = merge(1., 0., x > 0)
+  end function eval_2d_step
+
+  pure function eval_2d_step_prime(self, x) result(res)
+    class(step), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = 0
+  end function eval_2d_step_prime
+
   pure function eval_3d_step(self, x) result(res)
-    ! Step activation function.
     class(step), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -470,15 +616,14 @@ pure function eval_3d_step(self, x) result(res)
   end function eval_3d_step
 
   pure function eval_3d_step_prime(self, x) result(res)
-    ! First derivative of the step activation function.
     class(step), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = 0
   end function eval_3d_step_prime
 
+  ! Tanh Activation Functions
   pure function eval_1d_tanh(self, x) result(res)
-    ! Tangent hyperbolic activation function.
     class(tanhf), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -486,15 +631,27 @@ pure function eval_1d_tanh(self, x) result(res)
   end function eval_1d_tanh
 
   pure function eval_1d_tanh_prime(self, x) result(res)
-    ! First derivative of the tanh activation function.
     class(tanhf), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
     res = 1 - tanh(x)**2
   end function eval_1d_tanh_prime
 
+  pure function eval_2d_tanh(self, x) result(res)
+    class(tanhf), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = tanh(x)
+  end function eval_2d_tanh
+
+  pure function eval_2d_tanh_prime(self, x) result(res)
+    class(tanhf), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    res = 1 - tanh(x)**2
+  end function eval_2d_tanh_prime
+
   pure function eval_3d_tanh(self, x) result(res)
-    ! Tangent hyperbolic activation function.
     class(tanhf), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -502,15 +659,14 @@ pure function eval_3d_tanh(self, x) result(res)
   end function eval_3d_tanh
 
   pure function eval_3d_tanh_prime(self, x) result(res)
-    ! First derivative of the tanh activation function.
     class(tanhf), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
     res = 1 - tanh(x)**2
   end function eval_3d_tanh_prime
 
+  ! CELU Activation Functions
   pure function eval_1d_celu(self, x) result(res)
-    ! Celu activation function.
     class(celu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -519,10 +675,9 @@ pure function eval_1d_celu(self, x) result(res)
     else where
         res = self % alpha * (exp(x / self % alpha) - 1.0)
     end where
-  end function
+  end function eval_1d_celu
 
   pure function eval_1d_celu_prime(self, x) result(res)
-    ! Celu activation function.
     class(celu), intent(in) :: self
     real, intent(in) :: x(:)
     real :: res(size(x))
@@ -531,10 +686,31 @@ pure function eval_1d_celu_prime(self, x) result(res)
     else where
         res = exp(x / self % alpha)
     end where
-  end function
+  end function eval_1d_celu_prime
+
+  pure function eval_2d_celu(self, x) result(res)
+    class(celu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    where (x >= 0.0)
+        res = x
+    else where
+        res = self % alpha * (exp(x / self % alpha) - 1.0)
+    end where
+  end function eval_2d_celu
+
+  pure function eval_2d_celu_prime(self, x) result(res)
+    class(celu), intent(in) :: self
+    real, intent(in) :: x(:,:)
+    real :: res(size(x,1),size(x,2))
+    where (x >= 0.0)
+        res = 1.0
+    else where
+        res = exp(x / self % alpha)
+    end where
+  end function eval_2d_celu_prime
 
   pure function eval_3d_celu(self, x) result(res)
-    ! Celu activation function.
     class(celu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -543,10 +719,9 @@ pure function eval_3d_celu(self, x) result(res)
     else where
         res = self % alpha * (exp(x / self % alpha) - 1.0)
     end where
-  end function
+  end function eval_3d_celu
 
   pure function eval_3d_celu_prime(self, x) result(res)
-    ! Celu activation function.
     class(celu), intent(in) :: self
     real, intent(in) :: x(:,:,:)
     real :: res(size(x,1),size(x,2),size(x,3))
@@ -555,13 +730,10 @@ pure function eval_3d_celu_prime(self, x) result(res)
     else where
         res = exp(x / self % alpha)
     end where
-  end function
+  end function eval_3d_celu_prime
 
+  ! Utility Functions
   function get_activation_by_name(activation_name) result(res)
-  ! Workaround to get activation_function with some
-  ! hardcoded default parameters by its name.
-  ! Need this function since we get only activation name
-  ! from keras files.
     character(len=*), intent(in) :: activation_name
     class(activation_function), allocatable :: res
 
@@ -611,16 +783,8 @@ function get_activation_by_name(activation_name) result(res)
   end function get_activation_by_name
 
   pure function get_name(self) result(name)
-    !! Return the name of the activation function.
-    !!
-    !! Normally we would place this in the definition of each type, however
-    !! accessing the name variable directly from the type would require type
-    !! guards just like we have here. This at least keeps all the type guards
-    !! in one place.
     class(activation_function), intent(in) :: self
-      !! The activation function instance.
     character(:), allocatable :: name
-      !! The name of the activation function.
     select type (self)
     class is (elu)
       name = 'elu'
@@ -651,4 +815,4 @@ pure function get_name(self) result(name)
     end select
   end function get_name
 
-end module nf_activation
+end module nf_activation
\ No newline at end of file
diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90
new file mode 100644
index 00000000..c39b11fc
--- /dev/null
+++ b/src/nf/nf_conv1d_layer.f90
@@ -0,0 +1,119 @@
+module nf_conv1d_layer
+    !! This modules provides a 1-d convolutional `conv1d` type.
+  
+    use nf_activation, only: activation_function
+    use nf_base_layer, only: base_layer
+    implicit none
+  
+    private
+    public :: conv1d_layer
+  
+    type, extends(base_layer) :: conv1d_layer
+  
+      integer :: width
+      integer :: height
+      integer :: channels
+      integer :: kernel_size
+      integer :: filters
+  
+      real, allocatable :: biases(:) ! size(filters)
+      real, allocatable :: kernel(:,:,:) ! filters x channels x window 
+      real, allocatable :: output(:,:) ! filters x output_width 
+      real, allocatable :: z(:,:) ! kernel .dot. input + bias
+  
+      real, allocatable :: dw(:,:,:) ! weight (kernel) gradients
+      real, allocatable :: db(:) ! bias gradients
+      real, allocatable :: gradient(:,:)
+  
+      class(activation_function), allocatable :: activation
+  
+    contains
+  
+      procedure :: forward
+      procedure :: backward
+      procedure :: get_gradients
+      procedure :: get_num_params
+      procedure :: get_params
+      procedure :: init
+      procedure :: set_params
+  
+    end type conv1d_layer
+  
+    interface conv1d_layer
+      module function conv1d_layer_cons(filters, kernel_size, activation) &
+        result(res)
+        !! `conv1d_layer` constructor function
+        integer, intent(in) :: filters
+        integer, intent(in) :: kernel_size
+        class(activation_function), intent(in) :: activation
+        type(conv1d_layer) :: res
+      end function conv1d_layer_cons
+    end interface conv1d_layer
+  
+    interface
+  
+      module subroutine init(self, input_shape)
+        !! Initialize the layer data structures.
+        !!
+        !! This is a deferred procedure from the `base_layer` abstract type.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        integer, intent(in) :: input_shape(:)
+          !! Input layer dimensions
+      end subroutine init
+  
+      pure module subroutine forward(self, input)
+        !! Apply a forward pass on the `conv1d` layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data
+      end subroutine forward
+  
+      pure module subroutine backward(self, input, gradient)
+        !! Apply a backward pass on the `conv1d` layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data (previous layer)
+        real, intent(in) :: gradient(:,:)
+          !! Gradient (next layer)
+      end subroutine backward
+  
+      pure module function get_num_params(self) result(num_params)
+        !! Get the number of parameters in the layer.
+        class(conv1d_layer), intent(in) :: self
+          !! A `conv1d_layer` instance
+        integer :: num_params
+          !! Number of parameters
+      end function get_num_params
+  
+      module function get_params(self) result(params)
+        !! Return the parameters (weights and biases) of this layer.
+        !! The parameters are ordered as weights first, biases second.
+        class(conv1d_layer), intent(in), target :: self
+          !! A `conv1d_layer` instance
+        real, allocatable :: params(:)
+          !! Parameters to get
+      end function get_params
+  
+      module function get_gradients(self) result(gradients)
+        !! Return the gradients of this layer.
+        !! The gradients are ordered as weights first, biases second.
+        class(conv1d_layer), intent(in), target :: self
+          !! A `conv1d_layer` instance
+        real, allocatable :: gradients(:)
+          !! Gradients to get
+      end function get_gradients
+  
+      module subroutine set_params(self, params)
+        !! Set the parameters of the layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: params(:)
+          !! Parameters to set
+      end subroutine set_params
+  
+    end interface
+
+end module nf_conv1d_layer
diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
new file mode 100644
index 00000000..5404b9c7
--- /dev/null
+++ b/src/nf/nf_conv1d_layer_submodule.f90
@@ -0,0 +1,178 @@
+submodule(nf_conv1d_layer) nf_conv1d_layer_submodule
+
+  use nf_activation, only: activation_function
+  use nf_random, only: random_normal
+
+  implicit none
+
+contains
+
+  module function conv1d_layer_cons(filters, kernel_size, activation) result(res)
+    integer, intent(in) :: filters
+    integer, intent(in) :: kernel_size
+    class(activation_function), intent(in) :: activation
+    type(conv1d_layer) :: res
+
+    res % kernel_size = kernel_size
+    res % filters = filters
+    res % activation_name = activation % get_name()
+    allocate( res % activation, source = activation )
+  end function conv1d_layer_cons
+
+  module subroutine init(self, input_shape)
+    implicit none
+    class(conv1d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % channels = input_shape(1)
+    self % width = input_shape(2) - self % kernel_size + 1
+
+    ! Output of shape: filters x width
+    allocate(self % output(self % filters, self % width))
+    self % output = 0
+
+    ! Kernel of shape: filters x channels x kernel_size
+    allocate(self % kernel(self % filters, self % channels, self % kernel_size))
+    call random_normal(self % kernel)
+    self % kernel = self % kernel / self % kernel_size
+
+    allocate(self % biases(self % filters))
+    self % biases = 0
+
+    allocate(self % z, mold=self % output)
+    self % z = 0
+
+    allocate(self % gradient(input_shape(1), input_shape(2)))
+    self % gradient = 0
+
+    allocate(self % dw, mold=self % kernel)
+    self % dw = 0
+
+    allocate(self % db, mold=self % biases)
+    self % db = 0
+
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    implicit none
+    class(conv1d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    integer :: input_channels, input_width
+    integer :: j, n
+    integer :: iws, iwe
+
+    input_channels = size(input, dim=1)
+    input_width = size(input, dim=2)
+
+    ! Loop over output positions.
+    do j = 1, self % width
+      ! Compute the input window corresponding to output index j.
+      ! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
+      iws = j
+      iwe = j + self % kernel_size - 1
+
+      ! For each filter, compute the convolution (inner product over channels and kernel width).
+      do concurrent (n = 1:self % filters)
+        self % z(n, j) = sum(self % kernel(n,:,:) * input(:,iws:iwe))
+      end do
+
+      ! Add the bias for each filter.
+      self % z(:,j) = self % z(:,j) + self % biases
+    end do
+
+    ! Apply the activation function.
+    self % output = self % activation % eval(self % z)
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    implicit none
+    class(conv1d_layer), intent(in out) :: self
+    ! 'input' has shape: (channels, input_width)
+    ! 'gradient' (dL/dy) has shape: (filters, output_width)
+    real, intent(in) :: input(:,:)
+    real, intent(in) :: gradient(:,:)
+
+    integer :: input_channels, input_width, output_width
+    integer :: j, n, k
+    integer :: iws, iwe
+
+    ! Local arrays to accumulate gradients.
+    real :: gdz(self % filters, self % width)  ! local gradient (dL/dz)
+    real :: db_local(self % filters)
+    real :: dw_local(self % filters, self % channels, self % kernel_size)
+
+    ! Determine dimensions.
+    input_channels = size(input, dim=1)
+    input_width = size(input, dim=2)
+    output_width = self % width    ! Note: output_width = input_width - kernel_size + 1
+
+    !--- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
+    gdz = gradient * self % activation % eval_prime(self % z)
+
+    !--- Compute bias gradients: db(n) = sum_j gdz(n, j)
+    db_local = sum(gdz, dim=2)
+
+    !--- Initialize weight gradient and input gradient accumulators.
+    dw_local = 0.0
+    self % gradient = 0.0
+    
+    !--- Accumulate gradients over each output position.
+    ! In the forward pass the window for output index j was:
+    !   iws = j,  iwe = j + kernel_size - 1.
+    do n = 1, self % filters
+      do j = 1, output_width
+        iws = j
+        iwe = j + self % kernel_size - 1
+        do k = 1, self % channels
+          ! Weight gradient: accumulate contribution from the input window.
+          dw_local(n,k,:) = dw_local(n,k,:) + input(k,iws:iwe) * gdz(n,j)
+          ! Input gradient: propagate gradient back to the input window.
+          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,:) * gdz(n,j)
+        end do
+      end do
+    end do
+
+    !--- Update stored gradients.
+    self % dw = self % dw + dw_local
+    self % db = self % db + db_local
+
+  end subroutine backward
+
+  pure module function get_num_params(self) result(num_params)
+    class(conv1d_layer), intent(in) :: self
+    integer :: num_params
+    num_params = product(shape(self % kernel)) + size(self % biases)
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(conv1d_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+    real, pointer :: w_(:) => null()
+    w_(1:size(self % kernel)) => self % kernel
+    params = [ w_, self % biases]
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(conv1d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+    real, pointer :: dw_(:) => null()
+    dw_(1:size(self % dw)) => self % dw
+    gradients = [ dw_, self % db ]
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(conv1d_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+
+    if (size(params) /= self % get_num_params()) then
+      error stop 'conv1d_layer % set_params: Number of parameters does not match'
+    end if
+
+    self % kernel = reshape(params(:product(shape(self % kernel))), shape(self % kernel))
+    associate(n => product(shape(self % kernel)))
+      self % biases = params(n + 1 : n + self % filters)
+    end associate
+
+  end subroutine set_params
+
+end submodule nf_conv1d_layer_submodule
diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
new file mode 100644
index 00000000..94a868a5
--- /dev/null
+++ b/src/nf/nf_embedding_layer.f90
@@ -0,0 +1,98 @@
+module nf_embedding_layer
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: embedding_layer
+
+  type, extends(base_layer) :: embedding_layer
+    !! Embedding Layer
+    !! Stores inputs as a trainable lookup table. Inputs are
+    !! integer indicies in a dictionary of `vocab_size`.
+    !! This layer converts them into a table of shape
+    !! (`sequence_length`, `model_dimension`)
+    integer :: sequence_length, vocab_size, model_dimension
+    integer :: positional
+
+    real, allocatable :: weights(:, :)
+    real, allocatable :: output(:, :)
+    real, allocatable :: dw(:, :) ! weight gradients
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: positional_trigonometric
+    procedure :: positional_absolute
+    procedure :: init
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
+
+  end type embedding_layer
+
+  interface embedding_layer
+    module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
+      integer, intent(in) :: vocab_size, model_dimension
+      integer, optional :: positional
+      type(embedding_layer) :: res
+    end function embedding_layer_cons
+  end interface embedding_layer
+
+  interface
+    pure module subroutine forward(self, input)
+      !! Get vectors by indicis in the dictionary
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input(:)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      !! Update gradient at `input` indices
+      !! dw_i = W_i + d_output_i
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input(:)
+      real, intent(in) :: gradient(:, :)
+    end subroutine backward
+
+    pure module subroutine positional_trigonometric(self, pos)
+      !! Sum embedding with positional info (trigonometric, not trianable)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: pos
+    end subroutine positional_trigonometric
+
+    pure module subroutine positional_absolute(self, pos)
+      !! Sum embedding with absolute position
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: pos
+    end subroutine positional_absolute
+
+    module subroutine init(self, input_shape)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module function get_num_params(self) result(num_params)
+       class(embedding_layer), intent(in) :: self
+       integer :: num_params
+    end function get_num_params
+
+    module function get_params(self) result(params)
+      class(embedding_layer), intent(in), target :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+    module function get_gradients(self) result(gradients)
+      class(embedding_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      class(embedding_layer), intent(in out) :: self
+      real, intent(in), target :: params(:)
+    end subroutine set_params
+  end interface
+end module nf_embedding_layer
diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
new file mode 100644
index 00000000..83992b22
--- /dev/null
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -0,0 +1,137 @@
+#define NONE 0
+#define TRIGONOMETRIC 1
+#define ABSOLUTE 2
+
+submodule(nf_embedding_layer) nf_embedding_layer_submodule
+  use nf_base_layer, only: base_layer
+  implicit none
+contains
+  module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
+    integer, intent(in) :: vocab_size, model_dimension
+    integer, optional :: positional
+    type(embedding_layer) :: res
+
+    res % vocab_size = vocab_size
+    res % model_dimension = model_dimension
+    if (.not. present(positional)) then
+      res % positional = NONE
+    else
+      res % positional = positional
+    end if
+  end function embedding_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % sequence_length = input_shape(1)
+
+    allocate(self % output(self % sequence_length, self % model_dimension))
+
+    allocate(self % weights(self % vocab_size, self % model_dimension))
+    self % weights = 0.1
+
+    allocate(self % dw(self % vocab_size, self % model_dimension))
+    self % dw = 0.0
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    integer :: i, index
+
+    do concurrent(i = 1: self % sequence_length)
+      index = input(i)
+      if (index > size(self % weights, 1)) then
+        index = 1
+      elseif (index == 0) then
+        index = 1
+      end if
+
+      self % output(i, :) = self % weights(index, :)
+
+      if (self % positional == TRIGONOMETRIC) then
+        call self % positional_trigonometric(i)
+      elseif (self % positional == ABSOLUTE) then
+        call self % positional_absolute(i)
+      end if
+    end do
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    real, intent(in) :: gradient(:, :)
+    integer :: i
+
+    do concurrent(i = 1: self % sequence_length)
+      self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :)
+    end do
+  end subroutine backward
+
+  pure module subroutine positional_trigonometric(self, pos)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: pos
+    integer :: i
+    real :: theta
+
+    do concurrent(i = 1: floor(real(self % model_dimension) / 2))
+      theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension)
+      self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta)
+      self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta)
+    end do
+  end subroutine positional_trigonometric
+
+  pure module subroutine positional_absolute(self, pos)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: pos
+    integer :: i
+
+    do concurrent(i = 1: self % model_dimension)
+      self % output(pos, i) = self % output(pos, i) + pos - 1
+    end do
+  end subroutine positional_absolute
+
+  pure module function get_num_params(self) result(num_params)
+    class(embedding_layer), intent(in) :: self
+    integer :: num_params
+    num_params = self % vocab_size * self % model_dimension
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(embedding_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+    real, pointer :: w_(:) => null()
+
+    w_(1: product(shape(self % weights))) => self % weights
+    params = w_
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(embedding_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+    real, pointer :: dw_(:) => null()
+
+    dw_(1: product(shape(self % dw))) => self % dw
+    gradients = dw_
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(embedding_layer), intent(in out) :: self
+    real, intent(in), target :: params(:)
+
+    real, pointer :: p_(:,:) => null()
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    associate(n => self % vocab_size * self % model_dimension)
+      ! reshape the weights
+      p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n)
+      self % weights = p_
+    end associate
+
+  end subroutine set_params
+end submodule nf_embedding_layer_submodule
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index db60cf0f..e5f92f64 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -9,15 +9,21 @@ module nf_layer_constructors
 
   private
   public :: &
+    conv1d, &
     conv2d, &
     dense, &
     dropout, &
     flatten, &
     input, &
     linear2d, &
+    locally_connected1d, &
+    maxpool1d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    reshape2d, &
+    self_attention, &
+    embedding, &
+    layernorm
 
   interface input
 
@@ -152,6 +158,33 @@ module function flatten() result(res)
         !! Resulting layer instance
     end function flatten
 
+    module function conv1d(filters, kernel_size, activation) result(res)
+      !! 1-d convolutional layer constructor.
+      !!
+      !! This layer is for building 1-d convolutional network.
+      !! Although the established convention is to call these layers 1-d,
+      !! the shape of the data is actually 2-d: image width
+      !! and the number of channels.
+      !! A conv1d layer must not be the first layer in the network.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: conv1d, layer
+      !! type(layer) :: conv1d_layer
+      !! conv1d_layer = dense(filters=32, kernel_size=3)
+      !! conv1d_layer = dense(filters=32, kernel_size=3, activation='relu')
+      !! ```
+      integer, intent(in) :: filters
+        !! Number of filters in the output of the layer
+      integer, intent(in) :: kernel_size
+        !! Width of the convolution window, commonly 3 or 5
+      class(activation_function), intent(in), optional :: activation
+        !! Activation function (default sigmoid)
+      type(layer) :: res
+        !! Resulting layer instance
+    end function conv1d
+
     module function conv2d(filters, kernel_size, activation) result(res)
       !! 2-d convolutional layer constructor.
       !!
@@ -179,6 +212,55 @@ module function conv2d(filters, kernel_size, activation) result(res)
         !! Resulting layer instance
     end function conv2d
 
+    module function locally_connected1d(filters, kernel_size, activation) result(res)
+      !! 1-d locally connected network constructor
+      !!
+      !! This layer is for building 1-d locally connected network.
+      !! Although the established convention is to call these layers 1-d,
+      !! the shape of the data is actuall 2-d: image width,
+      !! and the number of channels.
+      !! A locally connected 1d layer must not be the first layer in the network.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: locally_connected1d, layer
+      !! type(layer) :: locally_connected1d_layer
+      !! locally_connected1d_layer = dense(filters=32, kernel_size=3)
+      !! locally_connected1d_layer = dense(filters=32, kernel_size=3, activation='relu')
+      !! ```
+      integer, intent(in) :: filters
+        !! Number of filters in the output of the layer
+      integer, intent(in) :: kernel_size
+        !! Width of the convolution window, commonly 3 or 5
+      class(activation_function), intent(in), optional :: activation
+        !! Activation function (default sigmoid)
+      type(layer) :: res
+        !! Resulting layer instance
+    end function locally_connected1d
+
+    module function maxpool1d(pool_size, stride) result(res)
+      !! 1-d maxpooling layer constructor.
+      !!
+      !! This layer is for downscaling other layers, typically `conv1d`.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: maxpool1d, layer
+      !! type(layer) :: maxpool1d_layer
+      !! maxpool1d_layer = maxpool1d(pool_size=2)
+      !! maxpool1d_layer = maxpool1d(pool_size=2, stride=3)
+      !! ```
+      integer, intent(in) :: pool_size
+        !! Width of the pooling window, commonly 2
+      integer, intent(in), optional :: stride
+        !! Stride of the pooling window, commonly equal to `pool_size`;
+        !! Defaults to `pool_size` if omitted.
+      type(layer) :: res
+        !! Resulting layer instance
+    end function maxpool1d
+
     module function maxpool2d(pool_size, stride) result(res)
       !! 2-d maxpooling layer constructor.
       !!
@@ -212,6 +294,17 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
+    module function reshape2d(output_shape) result(res)
+      !! Rank-1 to rank-any reshape layer constructor.
+      !! Currently implemented is only rank-2 for the output of the reshape.
+      !!
+      !! This layer is for connecting 1-d inputs to conv1d or similar layers.
+      integer, intent(in) :: output_shape(:)
+        !! Shape of the output
+      type(layer) :: res
+        !! Resulting layer instance
+    end function reshape2d
+
     module function linear2d(out_features) result(res)
       !! Rank-2 (sequence_length, out_features) linear layer constructor.
       !! sequence_length is determined at layer initialization, based on the
@@ -222,15 +315,40 @@ module function linear2d(out_features) result(res)
         !! Resulting layer instance
     end function linear2d
 
-  module function self_attention(num_heads) result(res)
-    !! Rank-2 (sequence_length, out_features) self attention constructor.
-    !! sequence_length and model_dimension are determined at layer initialization, based on the
-    !! output shape of the previous layer.
-    integer, intent(in) :: num_heads
-      !! Number of attention heads
-    type(layer) :: res
-      !! Resulting layer instance
-  end function self_attention
+    module function self_attention(num_heads) result(res)
+      !! Rank-2 (sequence_length, out_features) self attention constructor.
+      !! sequence_length and model_dimension are determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: num_heads
+        !! Number of attention heads
+      type(layer) :: res
+        !! Resulting layer instance
+    end function self_attention
+
+    module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
+      !! Embedding layer constructor.
+      !!
+      !! This layer is for inputting token indices from the dictionary to the network.
+      !! Works as a trainable lookup table that converts each index into a vector.
+      !! Embedding layer must be the first layer in a network.
+      integer, intent(in) :: sequence_length
+        !! max len of input sequence  
+      integer, intent(in) :: vocab_size
+        !! length of token vocabulary
+      integer, intent(in) :: model_dimension
+        !! size of target embeddings
+      integer, optional, intent(in) :: positional
+        !! positional encoding
+      type(layer) :: res
+    end function embedding
+
+    module function layernorm() result(res)
+      !! Layer Normalization
+      !! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
+      !! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
+      !! https://arxiv.org/abs/1607.06450v1
+      type(layer) :: res
+    end function layernorm
 
   end interface
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 9e5322c1..48fcd8a5 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -1,6 +1,7 @@
 submodule(nf_layer_constructors) nf_layer_constructors_submodule
 
   use nf_layer, only: layer
+  use nf_conv1d_layer, only: conv1d_layer
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_dropout_layer, only: dropout_layer
@@ -8,16 +9,46 @@
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
+  use nf_locally_connected1d_layer, only: locally_connected1d_layer
+  use nf_maxpool1d_layer, only: maxpool1d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_reshape2d_layer, only: reshape2d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
+  use nf_layernorm_layer, only: layernorm_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
 
 contains
 
+  module function conv1d(filters, kernel_size, activation) result(res)
+    integer, intent(in) :: filters
+    integer, intent(in) :: kernel_size
+    class(activation_function), intent(in), optional :: activation
+    type(layer) :: res
+
+    class(activation_function), allocatable :: activation_tmp
+
+    res % name = 'conv1d'
+
+    if (present(activation)) then
+      allocate(activation_tmp, source=activation)
+    else
+      allocate(activation_tmp, source=relu())
+    end if
+
+    res % activation = activation_tmp % get_name()
+
+    allocate( &
+      res % p, &
+      source=conv1d_layer(filters, kernel_size, activation_tmp) &
+    )
+
+  end function conv1d
+
   module function conv2d(filters, kernel_size, activation) result(res)
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_size
@@ -43,6 +74,31 @@ module function conv2d(filters, kernel_size, activation) result(res)
 
   end function conv2d
 
+  module function locally_connected1d(filters, kernel_size, activation) result(res)
+    integer, intent(in) :: filters
+    integer, intent(in) :: kernel_size
+    class(activation_function), intent(in), optional :: activation
+    type(layer) :: res
+
+    class(activation_function), allocatable :: activation_tmp
+
+    res % name = 'locally_connected1d'
+
+    if (present(activation)) then
+      allocate(activation_tmp, source=activation)
+    else
+      allocate(activation_tmp, source=relu())
+    end if
+
+    res % activation = activation_tmp % get_name()
+
+    allocate( &
+      res % p, &
+      source=locally_connected1d_layer(filters, kernel_size, activation_tmp) &
+    )
+
+  end function locally_connected1d
+
 
   module function dense(layer_size, activation) result(res)
     integer, intent(in) :: layer_size
@@ -116,6 +172,33 @@ module function input3d(dim1, dim2, dim3) result(res)
     res % initialized = .true.
   end function input3d
 
+  module function maxpool1d(pool_size, stride) result(res)
+    integer, intent(in) :: pool_size
+    integer, intent(in), optional :: stride
+    integer :: stride_
+    type(layer) :: res
+
+    if (pool_size < 2) &
+      error stop 'pool_size must be >= 2 in a maxpool1d layer'
+
+    ! Stride defaults to pool_size if not provided
+    if (present(stride)) then
+      stride_ = stride
+    else
+      stride_ = pool_size
+    end if
+
+    if (stride_ < 1) &
+      error stop 'stride must be >= 1 in a maxpool1d layer'
+
+    res % name = 'maxpool1d'
+
+    allocate( &
+      res % p, &
+      source=maxpool1d_layer(pool_size, stride_) &
+    )
+
+  end function maxpool1d
 
   module function maxpool2d(pool_size, stride) result(res)
     integer, intent(in) :: pool_size
@@ -161,6 +244,21 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
+  module function reshape2d(output_shape) result(res)
+    integer, intent(in) :: output_shape(:)
+    type(layer) :: res
+
+    res % name = 'reshape2d'
+    res % layer_shape = output_shape
+
+    if (size(output_shape) == 2) then
+      allocate(res % p, source=reshape2d_layer(output_shape))
+    else
+      error stop 'size(output_shape) of the reshape layer must == 2'
+    end if
+
+  end function reshape2d
+
 
   module function linear2d(out_features) result(res)
     integer, intent(in) :: out_features
@@ -171,6 +269,7 @@ module function linear2d(out_features) result(res)
 
   end function linear2d
 
+
   module function self_attention(num_heads) result(res)
     integer, intent(in) :: num_heads
     type(layer) :: res
@@ -179,4 +278,28 @@ module function self_attention(num_heads) result(res)
     allocate(res % p, source=self_attention_layer(num_heads))
   end function self_attention
 
+
+  module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
+    integer, intent(in) :: sequence_length, vocab_size, model_dimension
+    integer, optional, intent(in) :: positional
+    type(layer) :: res
+    type(embedding_layer) :: embedding_layer_instance
+
+    embedding_layer_instance = embedding_layer(vocab_size, model_dimension, positional)
+    call embedding_layer_instance % init([sequence_length])
+    res % name = 'embedding'
+    res % layer_shape = [sequence_length, model_dimension]
+    res % input_layer_shape = [integer ::]
+    allocate(res % p, source=embedding_layer_instance)
+    res % initialized = .true.
+
+  end function embedding
+
+
+  module function layernorm() result(res)
+    type(layer) :: res
+    res % name = 'layernorm'
+    allocate(res % p, source=layernorm_layer())
+  end function layernorm
+
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ecdeb41d..63af7264 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -1,6 +1,7 @@
 submodule(nf_layer) nf_layer_submodule
 
   use iso_fortran_env, only: stderr => error_unit
+  use nf_conv1d_layer, only: conv1d_layer
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_dropout_layer, only: dropout_layer
@@ -8,10 +9,15 @@
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
+  use nf_locally_connected1d_layer, only: locally_connected1d_layer
+  use nf_maxpool1d_layer, only: maxpool1d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
+  use nf_reshape2d_layer, only: reshape2d_layer
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
+  use nf_layernorm_layer, only: layernorm_layer
   use nf_optimizers, only: optimizer_base_type
 
 contains
@@ -23,7 +29,7 @@ pure module subroutine backward_1d(self, previous, gradient)
     real, intent(in) :: gradient(:)
 
     ! Backward pass from a 1-d layer downstream currently implemented
-    ! only for dense and flatten layers
+    ! only for dense, dropout and flatten layers
     select type(this_layer => self % p)
 
       type is(dense_layer)
@@ -46,12 +52,18 @@ pure module subroutine backward_1d(self, previous, gradient)
 
       type is(flatten_layer)
 
-        ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
+        ! Upstream layers permitted: input2d, input3d, conv1d, conv2d, locally_connected1d, maxpool1d, maxpool2d
         select type(prev_layer => previous % p)
           type is(input2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(locally_connected1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(maxpool1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(input3d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(conv1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(conv2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
@@ -60,6 +72,10 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(layernorm_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -80,10 +96,14 @@ pure module subroutine backward_2d(self, previous, gradient)
         select type(prev_layer => previous % p)
           type is(input2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(linear2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(layernorm_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
       type is(self_attention_layer)
@@ -91,14 +111,84 @@ pure module subroutine backward_2d(self, previous, gradient)
         select type(prev_layer => previous % p)
           type is(input2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(linear2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(layernorm_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
+      type is(layernorm_layer)
+
+        select type(prev_layer => previous % p)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(self_attention_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
     end select
 
+    ! Backward pass from a 2-d layer downstream currently implemented
+    ! only for dense and flatten layers
+    
+    select type(this_layer => self % p)
+
+    type is(conv1d_layer)
+
+      select type(prev_layer => previous % p)
+        type is(maxpool1d_layer)
+          call this_layer % backward(prev_layer % output, gradient)
+        type is(reshape2d_layer)
+          call this_layer % backward(prev_layer % output, gradient)
+        type is(input2d_layer)
+          call this_layer % backward(prev_layer % output, gradient)
+        type is(locally_connected1d_layer)
+          call this_layer % backward(prev_layer % output, gradient)
+        type is(conv1d_layer)
+          call this_layer % backward(prev_layer % output, gradient)
+      end select
+
+      type is(locally_connected1d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(maxpool1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(reshape2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(locally_connected1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(conv1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+      
+      type is(maxpool1d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(maxpool1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(reshape2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(locally_connected1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(conv1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+      type is(reshape2d_layer) 
+        select type(prev_layer => previous % p) 
+          type is(input1d_layer) 
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+      
+      end select
+
   end subroutine backward_2d
 
 
@@ -203,6 +293,54 @@ module subroutine forward(self, input)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
         end select
+      
+      type is(locally_connected1d_layer)
+
+        ! Upstream layers permitted: input2d, locally_connected1d, maxpool1d, reshape2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(locally_connected1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(maxpool1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(reshape2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(conv1d_layer)
+            call this_layer % forward(prev_layer % output)    
+        end select
+      
+      type is(conv1d_layer)
+
+        ! Upstream layers permitted: input2d, locally_connected1d, maxpool1d, reshape2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(locally_connected1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(maxpool1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(reshape2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(conv1d_layer)
+            call this_layer % forward(prev_layer % output)    
+        end select
+      
+      type is(maxpool1d_layer)
+
+        ! Upstream layers permitted: input1d, locally_connected1d, maxpool1d, reshape2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(locally_connected1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(maxpool1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(reshape2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(conv1d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
 
       type is(maxpool2d_layer)
 
@@ -220,20 +358,30 @@ module subroutine forward(self, input)
 
       type is(flatten_layer)
 
-        ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d, reshape3d
+        ! Upstream layers permitted: input2d, input3d, conv2d, maxpool1d, maxpool2d, reshape2d, reshape3d, locally_connected2d
         select type(prev_layer => input % p)
           type is(input2d_layer)
             call this_layer % forward(prev_layer % output)
           type is(input3d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(conv1d_layer)
+            call this_layer % forward(prev_layer % output)
           type is(conv2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(locally_connected1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(maxpool1d_layer)
+            call this_layer % forward(prev_layer % output)
           type is(maxpool2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(reshape2d_layer)
+            call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(layernorm_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -247,25 +395,49 @@ module subroutine forward(self, input)
           type is(flatten_layer)
             call this_layer % forward(prev_layer % output)
         end select
+      
+      type is(reshape2d_layer)
+        select type(prev_layer => input % p) 
+          type is(input1d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
 
       type is(linear2d_layer)
 
-        ! Upstream layers permitted: input2d, linear2d
+        ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
         select type(prev_layer => input % p)
           type is(input2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(embedding_layer)
+            call this_layer % forward(prev_layer % output)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
             call this_layer % forward(prev_layer % output)
+          type is(layernorm_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(self_attention_layer)
 
-        ! Upstream layers permitted: input2d, linear2d
+        ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
         select type(prev_layer => input % p)
           type is(input2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(embedding_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(self_attention_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(layernorm_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
+
+      type is(layernorm_layer)
+
+        ! Upstream layers permitted: linear2d, self_attention
+        select type(prev_layer => input % p)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
@@ -307,12 +479,26 @@ pure module subroutine get_output_2d(self, output)
 
       type is(input2d_layer)
         allocate(output, source=this_layer % output)
+      type is(maxpool1d_layer)
+        allocate(output, source=this_layer % output)
+      type is(locally_connected1d_layer)
+        allocate(output, source=this_layer % output)
+      type is(conv1d_layer)
+        allocate(output, source=this_layer % output)
+      type is(reshape2d_layer)
+        allocate(output, source=this_layer % output)
+      type is(embedding_layer)
+        allocate(output, source=this_layer % output)
       type is(linear2d_layer)
         allocate(output, source=this_layer % output)
       type is(self_attention_layer)
         allocate(output, source=this_layer % output)
+      type is(layernorm_layer)
+        allocate(output, source=this_layer % output)
       class default
-        error stop '2-d output can only be read from an input2d or linear2d layer.'
+        error stop '2-d output can only be read from a input2d, maxpool1d, ' &
+          // 'locally_connected1d, conv1d, reshape2d, embedding, linear2d, ' &
+          // 'self_attention, or layernorm layer.'
 
     end select
 
@@ -354,19 +540,27 @@ impure elemental module subroutine init(self, input)
       call this_layer % init(input % layer_shape)
     end select
 
-    ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
-    ! self_attention layers is not known until we receive an input layer.
+    ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
+    ! self_attention or layernorm layers is not known until we receive an input layer.
     select type(this_layer => self % p)
+      type is(conv1d_layer)
+        self % layer_shape = shape(this_layer % output)
       type is(conv2d_layer)
         self % layer_shape = shape(this_layer % output)
       type is(dropout_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(locally_connected1d_layer)
+        self % layer_shape = shape(this_layer % output)
+      type is(maxpool1d_layer)
+        self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
       type is(linear2d_layer)
         self % layer_shape = shape(this_layer % output)
       type is(self_attention_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(layernorm_layer)
+        self % layer_shape = shape(this_layer % output)
       type is(maxpool2d_layer)
         self % layer_shape = shape(this_layer % output)
     end select
@@ -413,18 +607,30 @@ elemental module function get_num_params(self) result(num_params)
         num_params = this_layer % get_num_params()
       type is (dropout_layer)
         num_params = 0
+      type is (conv1d_layer)
+        num_params = this_layer % get_num_params()
       type is (conv2d_layer)
         num_params = this_layer % get_num_params()
+      type is (locally_connected1d_layer)
+        num_params = this_layer % get_num_params()
+      type is (maxpool1d_layer)
+        num_params = 0
       type is (maxpool2d_layer)
         num_params = 0
       type is (flatten_layer)
         num_params = 0
+      type is (reshape2d_layer)
+        num_params = 0
       type is (reshape3d_layer)
         num_params = 0
       type is (linear2d_layer)
         num_params = this_layer % get_num_params()
       type is (self_attention_layer)
         num_params = this_layer % get_num_params()
+      type is (embedding_layer)
+        num_params = this_layer % get_num_params()
+      type is (layernorm_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -446,18 +652,30 @@ module function get_params(self) result(params)
         params = this_layer % get_params()
       type is (dropout_layer)
         ! No parameters to get.
+      type is (conv1d_layer)
+        params = this_layer % get_params()
       type is (conv2d_layer)
         params = this_layer % get_params()
+      type is (locally_connected1d_layer)
+        params = this_layer % get_params()
+      type is (maxpool1d_layer)
+        ! No parameters to get.
       type is (maxpool2d_layer)
         ! No parameters to get.
       type is (flatten_layer)
         ! No parameters to get.
+      type is (reshape2d_layer)
+        ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
       type is (linear2d_layer)
         params = this_layer % get_params()
       type is (self_attention_layer)
         params = this_layer % get_params()
+      type is (embedding_layer)
+        params = this_layer % get_params()
+      type is (layernorm_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -479,11 +697,19 @@ module function get_gradients(self) result(gradients)
         gradients = this_layer % get_gradients()
       type is (dropout_layer)
         ! No gradients to get.
+      type is (conv1d_layer)
+        gradients = this_layer % get_gradients()
       type is (conv2d_layer)
         gradients = this_layer % get_gradients()
+      type is (locally_connected1d_layer)
+        gradients = this_layer % get_gradients()
+      type is (maxpool1d_layer)
+        ! No gradients to get.
       type is (maxpool2d_layer)
         ! No gradients to get.
       type is (flatten_layer)
+        ! No gradients to get.
+      type is (reshape2d_layer)
         ! No parameters to get.
       type is (reshape3d_layer)
         ! No gradients to get.
@@ -491,6 +717,10 @@ module function get_gradients(self) result(gradients)
         gradients = this_layer % get_gradients()
       type is (self_attention_layer)
         gradients = this_layer % get_gradients()
+      type is (embedding_layer)
+        gradients = this_layer % get_gradients()
+      type is (layernorm_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -539,15 +769,31 @@ module subroutine set_params(self, params)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
           // 'on a zero-parameter layer; nothing to do.'
+        
+      type is (conv1d_layer)
+          call this_layer % set_params(params)
 
       type is (conv2d_layer)
         call this_layer % set_params(params)
+      
+      type is (locally_connected1d_layer)
+        call this_layer % set_params(params)
+      
+      type is (maxpool1d_layer)
+        ! No parameters to set.
+        write(stderr, '(a)') 'Warning: calling set_params() ' &
+          // 'on a zero-parameter layer; nothing to do.'
 
       type is (linear2d_layer)
         call this_layer % set_params(params)
 
       type is (self_attention_layer)
         call this_layer % set_params(params)
+      type is (embedding_layer)
+        call this_layer % set_params(params)
+
+      type is (layernorm_layer)
+        call this_layer % set_params(params)
 
       type is (maxpool2d_layer)
         ! No parameters to set.
@@ -558,6 +804,11 @@ module subroutine set_params(self, params)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
           // 'on a zero-parameter layer; nothing to do.'
+        
+      type is (reshape2d_layer)
+        ! No parameters to set.
+        write(stderr, '(a)') 'Warning: calling set_params() ' &
+          // 'on a zero-parameter layer; nothing to do.'
 
       type is (reshape3d_layer)
         ! No parameters to set.
diff --git a/src/nf/nf_layernorm.f90 b/src/nf/nf_layernorm.f90
new file mode 100644
index 00000000..36ef56f0
--- /dev/null
+++ b/src/nf/nf_layernorm.f90
@@ -0,0 +1,92 @@
+module nf_layernorm_layer
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: layernorm_layer
+
+  type, extends(base_layer) :: layernorm_layer
+    !! Layer Normalization
+    !! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
+    !! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
+    !! https://arxiv.org/abs/1607.06450v1
+    integer :: sequence_length
+    integer :: model_dimension
+
+    real :: eps
+    real, allocatable :: gamma(:)
+    real, allocatable :: beta(:)
+
+    real, allocatable :: d_gamma(:)
+    real, allocatable :: d_beta(:)
+    real, allocatable :: gradient(:, :)
+
+    real, allocatable :: mu(:, :)
+    real, allocatable :: sigma(:)
+
+    real, allocatable :: output(:, :)
+
+    ! temp storages
+    real, allocatable, private :: normalized(:, :)
+    real, allocatable, private :: one_over_sigma(:, :)
+    real, allocatable, private :: gradient_by_gamma_over_sigma(:, :)
+  contains
+    procedure :: forward
+    procedure :: backward
+    procedure :: init
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
+  end type layernorm_layer
+
+  interface layernorm_layer
+    module function layernorm_layer_cons() &
+      result(res)
+      type(layernorm_layer) :: res
+    end function layernorm_layer_cons
+  end interface layernorm_layer
+
+  interface
+    pure module subroutine forward(self, input)
+      class(layernorm_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      class(layernorm_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+      real, intent(in) :: gradient(:, :)
+    end subroutine backward
+
+    module subroutine init(self, input_shape)
+      class(layernorm_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module function get_num_params(self) result(num_params)
+      class(layernorm_layer), intent(in) :: self
+      integer :: num_params
+    end function get_num_params
+
+
+    module function get_params(self) result(params)
+      class(layernorm_layer), intent(in), target :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+
+    module function get_gradients(self) result(gradients)
+      class(layernorm_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+
+    module subroutine set_params(self, params)
+      class(layernorm_layer), intent(in out) :: self
+      real, intent(in), target :: params(:)
+    end subroutine set_params
+  end interface
+end module nf_layernorm_layer
\ No newline at end of file
diff --git a/src/nf/nf_layernorm_submodule.f90 b/src/nf/nf_layernorm_submodule.f90
new file mode 100644
index 00000000..4eaa4382
--- /dev/null
+++ b/src/nf/nf_layernorm_submodule.f90
@@ -0,0 +1,149 @@
+submodule(nf_layernorm_layer) nf_layernorm_layer_submodule
+  implicit none
+contains
+    module function layernorm_layer_cons() &
+    result(res)
+    type(layernorm_layer) :: res
+
+    res % eps = 1e-5
+  end function layernorm_layer_cons
+
+  pure module subroutine forward(self, input)
+    class(layernorm_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    integer :: i
+
+    ! mu = x - MEAN_last_dim(x)
+    do concurrent(i = 1: self % model_dimension)
+      self % mu(:, i) = input(:, i) - (sum(input, dim=2) / self % model_dimension)
+    end do
+
+    ! square root of variance shifted be eps
+    self % sigma = sqrt((sum(self % mu ** 2, dim=2) / self % model_dimension) + self % eps)
+
+    ! normalize mu by variance by first axis
+    do concurrent(i = 1: self % model_dimension)
+      self % normalized(:, i) = self % mu(:, i) / self % sigma
+    end do
+
+    ! forward through trainable params gamma and beta
+    do concurrent(i = 1: self % sequence_length)
+      self % output(i, :) = self % normalized(i, :) * self % gamma + self % beta
+    end do
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(layernorm_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    real, intent(in) :: gradient(:, :)
+
+    self % one_over_sigma = (1 / spread(self % sigma, dim=2, ncopies=self % model_dimension))
+    self % gradient_by_gamma_over_sigma = &
+        gradient &
+        * spread(self % gamma, dim=1, ncopies=self % sequence_length) &
+        * self % one_over_sigma
+
+    ! d_output/d_gamma = sum(d_output/d_y * mu/sigma)
+    self % d_gamma = sum(gradient * self % mu * self % one_over_sigma, dim=1)
+
+    ! d_output/d_beta = sum(d_output/d_y) * 1
+    self % d_beta = sum(gradient, dim=1)
+
+    ! From this article:
+    ! https://robotchinwag.com/posts/layer-normalization-deriving-the-gradient-for-the-backward-pass/
+    ! d_output/d_x = d_output/d_y * gamma/sigma
+    !     - d_output/d_y
+    !     - sum(d_output/d_y * gamma/sigma) / len
+    !     - mu * sum(d_output/d_y * gamma * mu * sigma^(03)) / len
+    self % gradient = &
+        self % gradient_by_gamma_over_sigma &
+        - spread(&
+            sum(self % gradient_by_gamma_over_sigma, dim=2),&
+            dim=2,&
+            ncopies=self % model_dimension&
+          ) / self % model_dimension &
+        - self % mu * spread(&
+            sum(self % gradient_by_gamma_over_sigma * self % mu * (self % one_over_sigma ** 2), dim=2),&
+            dim=2,&
+            ncopies=self % model_dimension&
+          ) / self % model_dimension
+  end subroutine backward
+
+  module subroutine init(self, input_shape)
+    class(layernorm_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    if (size(input_shape) /= 2) then
+      error stop "LayerNorm Layer accepts 2D input"
+    end if
+    self % sequence_length = input_shape(1)
+    self % model_dimension = input_shape(2)
+
+    ! default initialization from PyTorch
+    allocate(self % gamma(self % model_dimension))
+    self % gamma = 1.
+    allocate(self % beta(self % model_dimension))
+    self % beta = 0.
+
+    allocate(self % d_gamma(self % model_dimension))
+    allocate(self % d_beta(self % model_dimension))
+    allocate(self % gradient(self % sequence_length, self % model_dimension))
+
+    allocate(self % mu(self % sequence_length, self % model_dimension))
+    allocate(self % sigma(self % sequence_length))
+
+    allocate(self % output(self % sequence_length, self % model_dimension))
+
+    allocate(self % normalized, mold=self % mu)
+    allocate(self % one_over_sigma, mold=self % mu)
+    allocate(self % gradient_by_gamma_over_sigma, mold=self % mu)
+  end subroutine init
+
+  pure module function get_num_params(self) result(num_params)
+    class(layernorm_layer), intent(in) :: self
+    integer :: num_params
+
+    ! Number of weights times number of biases
+    num_params = 2 * self % model_dimension
+
+  end function get_num_params
+
+
+  module function get_params(self) result(params)
+    class(layernorm_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+
+    params = [ &
+      self % gamma, &
+      self % beta &
+    ]
+
+  end function get_params
+
+
+  module function get_gradients(self) result(gradients)
+    class(layernorm_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    gradients = [ &
+      self % d_gamma, &
+      self % d_beta &
+    ]
+
+  end function get_gradients
+
+
+  module subroutine set_params(self, params)
+    class(layernorm_layer), intent(in out) :: self
+    real, intent(in), target :: params(:)
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    self % gamma = params(1: self % model_dimension)
+    self % beta = params(self % model_dimension + 1: 2 * self % model_dimension)
+
+  end subroutine set_params
+end submodule nf_layernorm_layer_submodule
diff --git a/src/nf/nf_locally_connected1d_layer.f90 b/src/nf/nf_locally_connected1d_layer.f90
new file mode 100644
index 00000000..beca76d5
--- /dev/null
+++ b/src/nf/nf_locally_connected1d_layer.f90
@@ -0,0 +1,119 @@
+module nf_locally_connected1d_layer
+    !! This modules provides a 1-d convolutional `locally_connected1d` type.
+  
+    use nf_activation, only: activation_function
+    use nf_base_layer, only: base_layer
+    implicit none
+  
+    private
+    public :: locally_connected1d_layer
+  
+    type, extends(base_layer) :: locally_connected1d_layer
+  
+      integer :: width
+      integer :: height
+      integer :: channels
+      integer :: kernel_size
+      integer :: filters
+  
+      real, allocatable :: biases(:,:) ! size(filters)
+      real, allocatable :: kernel(:,:,:,:) ! filters x channels x window x window
+      real, allocatable :: output(:,:) ! filters x output_width * output_height
+      real, allocatable :: z(:,:) ! kernel .dot. input + bias
+  
+      real, allocatable :: dw(:,:,:,:) ! weight (kernel) gradients
+      real, allocatable :: db(:,:) ! bias gradients
+      real, allocatable :: gradient(:,:)
+  
+      class(activation_function), allocatable :: activation
+  
+    contains
+  
+      procedure :: forward
+      procedure :: backward
+      procedure :: get_gradients
+      procedure :: get_num_params
+      procedure :: get_params
+      procedure :: init
+      procedure :: set_params
+  
+    end type locally_connected1d_layer
+  
+    interface locally_connected1d_layer
+      module function locally_connected1d_layer_cons(filters, kernel_size, activation) &
+        result(res)
+        !! `locally_connected1d_layer` constructor function
+        integer, intent(in) :: filters
+        integer, intent(in) :: kernel_size
+        class(activation_function), intent(in) :: activation
+        type(locally_connected1d_layer) :: res
+      end function locally_connected1d_layer_cons
+    end interface locally_connected1d_layer
+  
+    interface
+  
+      module subroutine init(self, input_shape)
+        !! Initialize the layer data structures.
+        !!
+        !! This is a deferred procedure from the `base_layer` abstract type.
+        class(locally_connected1d_layer), intent(in out) :: self
+          !! A `locally_connected1d_layer` instance
+        integer, intent(in) :: input_shape(:)
+          !! Input layer dimensions
+      end subroutine init
+  
+      pure module subroutine forward(self, input)
+        !! Apply a forward pass on the `locally_connected1d` layer.
+        class(locally_connected1d_layer), intent(in out) :: self
+          !! A `locally_connected1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data
+      end subroutine forward
+  
+      pure module subroutine backward(self, input, gradient)
+        !! Apply a backward pass on the `locally_connected1d` layer.
+        class(locally_connected1d_layer), intent(in out) :: self
+          !! A `locally_connected1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data (previous layer)
+        real, intent(in) :: gradient(:,:)
+          !! Gradient (next layer)
+      end subroutine backward
+  
+      pure module function get_num_params(self) result(num_params)
+        !! Get the number of parameters in the layer.
+        class(locally_connected1d_layer), intent(in) :: self
+          !! A `locally_connected1d_layer` instance
+        integer :: num_params
+          !! Number of parameters
+      end function get_num_params
+  
+      module function get_params(self) result(params)
+        !! Return the parameters (weights and biases) of this layer.
+        !! The parameters are ordered as weights first, biases second.
+        class(locally_connected1d_layer), intent(in), target :: self
+          !! A `locally_connected1d_layer` instance
+        real, allocatable :: params(:)
+          !! Parameters to get
+      end function get_params
+  
+      module function get_gradients(self) result(gradients)
+        !! Return the gradients of this layer.
+        !! The gradients are ordered as weights first, biases second.
+        class(locally_connected1d_layer), intent(in), target :: self
+          !! A `locally_connected1d_layer` instance
+        real, allocatable :: gradients(:)
+          !! Gradients to get
+      end function get_gradients
+  
+      module subroutine set_params(self, params)
+        !! Set the parameters of the layer.
+        class(locally_connected1d_layer), intent(in out) :: self
+          !! A `locally_connected1d_layer` instance
+        real, intent(in) :: params(:)
+          !! Parameters to set
+      end subroutine set_params
+  
+    end interface
+
+end module nf_locally_connected1d_layer
diff --git a/src/nf/nf_locally_connected1d_layer_submodule.f90 b/src/nf/nf_locally_connected1d_layer_submodule.f90
new file mode 100644
index 00000000..053c520b
--- /dev/null
+++ b/src/nf/nf_locally_connected1d_layer_submodule.f90
@@ -0,0 +1,152 @@
+submodule(nf_locally_connected1d_layer) nf_locally_connected1d_layer_submodule
+
+  use nf_activation, only: activation_function
+  use nf_random, only: random_normal
+
+  implicit none
+
+contains
+
+  module function locally_connected1d_layer_cons(filters, kernel_size, activation) result(res)
+    implicit none
+    integer, intent(in) :: filters
+    integer, intent(in) :: kernel_size
+    class(activation_function), intent(in) :: activation
+    type(locally_connected1d_layer) :: res
+
+    res % kernel_size = kernel_size
+    res % filters = filters
+    res % activation_name = activation % get_name()
+    allocate(res % activation, source = activation)
+  end function locally_connected1d_layer_cons
+
+  module subroutine init(self, input_shape)
+    implicit none
+    class(locally_connected1d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % channels = input_shape(1)
+    self % width = input_shape(2) - self % kernel_size + 1
+
+    allocate(self % output(self % filters, self % width))
+    self % output = 0
+
+    allocate(self % kernel(self % filters, self % width, self % channels, self % kernel_size))
+    call random_normal(self % kernel)
+    self % kernel = self % kernel / real(self % kernel_size**2)
+
+    allocate(self % biases(self % filters, self % width))
+    self % biases = 0
+
+    allocate(self % z, mold=self % output)
+    self % z = 0
+
+    allocate(self % gradient(input_shape(1), input_shape(2)))
+    self % gradient = 0
+
+    allocate(self % dw, mold=self % kernel)
+    self % dw = 0
+
+    allocate(self % db, mold=self % biases)
+    self % db = 0
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    implicit none
+    class(locally_connected1d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    integer :: input_channels, input_width
+    integer :: j, n
+    integer :: iws, iwe
+
+    input_channels = size(input, dim=1)
+    input_width    = size(input, dim=2)
+
+    do j = 1, self % width
+      iws = j
+      iwe = j + self % kernel_size - 1
+      do n = 1, self % filters
+        self % z(n, j) = sum(self % kernel(n, j, :, :) * input(:, iws:iwe)) + self % biases(n, j)
+      end do
+    end do
+    self % output = self % activation % eval(self % z)
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    implicit none
+    class(locally_connected1d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    real, intent(in) :: gradient(:,:)
+    integer :: input_channels, input_width, output_width
+    integer :: j, n, k
+    integer :: iws, iwe
+    real :: gdz(self % filters, self % width)
+    real :: db_local(self % filters, self % width)
+    real :: dw_local(self % filters, self % width, self % channels, self % kernel_size)
+
+    input_channels = size(input, dim=1)
+    input_width    = size(input, dim=2)
+    output_width   = self % width
+
+    do j = 1, output_width
+       gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j))
+    end do
+
+    do n = 1, self % filters
+       do j = 1, output_width
+          db_local(n, j) = gdz(n, j)
+       end do
+    end do
+
+    dw_local = 0.0
+    self % gradient = 0.0
+
+    do n = 1, self % filters
+       do j = 1, output_width
+          iws = j
+          iwe = j + self % kernel_size - 1
+          do k = 1, self % channels
+             dw_local(n, j, k, :) = dw_local(n, j, k, :) + input(k, iws:iwe) * gdz(n, j)
+             self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, j, k, :) * gdz(n, j)
+          end do
+       end do
+    end do
+
+    self % dw = self % dw + dw_local
+    self % db = self % db + db_local
+  end subroutine backward
+
+  pure module function get_num_params(self) result(num_params)
+    class(locally_connected1d_layer), intent(in) :: self
+    integer :: num_params
+    num_params = product(shape(self % kernel)) + product(shape(self % biases))
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(locally_connected1d_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+    params = [self % kernel, self % biases]
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(locally_connected1d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+    gradients = [self % dw, self % db]
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(locally_connected1d_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+
+    if (size(params) /= self % get_num_params()) then
+      error stop 'locally_connected1d_layer % set_params: Number of parameters does not match'
+    end if
+
+    self % kernel = reshape(params(:product(shape(self % kernel))), shape(self % kernel))
+    associate(n => product(shape(self % kernel)))
+      self % biases = reshape(params(n + 1 :), shape(self % biases))
+    end associate
+
+  end subroutine set_params
+
+end submodule nf_locally_connected1d_layer_submodule
diff --git a/src/nf/nf_maxpool1d_layer.f90 b/src/nf/nf_maxpool1d_layer.f90
new file mode 100644
index 00000000..b9a14d07
--- /dev/null
+++ b/src/nf/nf_maxpool1d_layer.f90
@@ -0,0 +1,69 @@
+module nf_maxpool1d_layer
+  !! This module provides the 1-d maxpooling layer.
+
+  use nf_base_layer, only: base_layer
+  implicit none
+
+  private
+  public :: maxpool1d_layer
+
+  type, extends(base_layer) :: maxpool1d_layer
+      integer :: channels
+      integer :: width      ! Length of the input along the pooling dimension
+      integer :: pool_size
+      integer :: stride
+
+      ! Location (as input matrix indices) of the maximum value within each pooling region.
+      ! Dimensions: (channels, new_width)
+      integer, allocatable :: maxloc(:,:)
+
+      ! Gradient for the input (same shape as the input).
+      real, allocatable :: gradient(:,:)
+      ! Output after pooling (dimensions: (channels, new_width)).
+      real, allocatable :: output(:,:)
+  contains
+      procedure :: init
+      procedure :: forward
+      procedure :: backward
+  end type maxpool1d_layer
+
+  interface maxpool1d_layer
+      pure module function maxpool1d_layer_cons(pool_size, stride) result(res)
+          !! `maxpool1d` constructor function.
+          integer, intent(in) :: pool_size
+              !! Width of the pooling window.
+          integer, intent(in) :: stride
+              !! Stride of the pooling window.
+          type(maxpool1d_layer) :: res
+      end function maxpool1d_layer_cons
+  end interface maxpool1d_layer
+
+  interface
+      module subroutine init(self, input_shape)
+          !! Initialize the `maxpool1d` layer instance with an input shape.
+          class(maxpool1d_layer), intent(in out) :: self
+              !! `maxpool1d_layer` instance.
+          integer, intent(in) :: input_shape(:)
+              !! Array shape of the input layer, expected as (channels, width).
+      end subroutine init
+
+      pure module subroutine forward(self, input)
+          !! Run a forward pass of the `maxpool1d` layer.
+          class(maxpool1d_layer), intent(in out) :: self
+              !! `maxpool1d_layer` instance.
+          real, intent(in) :: input(:,:)
+              !! Input data (output of the previous layer), with shape (channels, width).
+      end subroutine forward
+
+      pure module subroutine backward(self, input, gradient)
+          !! Run a backward pass of the `maxpool1d` layer.
+          class(maxpool1d_layer), intent(in out) :: self
+              !! `maxpool1d_layer` instance.
+          real, intent(in) :: input(:,:)
+              !! Input data (output of the previous layer).
+          real, intent(in) :: gradient(:,:)
+              !! Gradient from the downstream layer, with shape (channels, pooled width).
+      end subroutine backward
+  end interface
+
+end module nf_maxpool1d_layer
\ No newline at end of file
diff --git a/src/nf/nf_maxpool1d_layer_submodule.f90 b/src/nf/nf_maxpool1d_layer_submodule.f90
new file mode 100644
index 00000000..336264f7
--- /dev/null
+++ b/src/nf/nf_maxpool1d_layer_submodule.f90
@@ -0,0 +1,93 @@
+submodule(nf_maxpool1d_layer) nf_maxpool1d_layer_submodule
+
+  implicit none
+
+contains
+
+  pure module function maxpool1d_layer_cons(pool_size, stride) result(res)
+    implicit none
+    integer, intent(in) :: pool_size
+    integer, intent(in) :: stride
+    type(maxpool1d_layer) :: res
+    res % pool_size = pool_size
+    res % stride = stride
+  end function maxpool1d_layer_cons
+
+
+  module subroutine init(self, input_shape)
+    implicit none
+    class(maxpool1d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % channels = input_shape(1)
+    self % width = input_shape(2) / self % stride
+
+    allocate(self % maxloc(self % channels, self % width))
+    self % maxloc = 0
+
+    allocate(self % gradient(input_shape(1),input_shape(2)))
+    self % gradient = 0
+
+    allocate(self % output(self % channels, self % width))
+    self % output = 0
+
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    implicit none
+    class(maxpool1d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    integer :: input_width
+    integer :: i, n
+    integer :: ii
+    integer :: iend
+    integer :: iextent
+    integer :: maxloc_x
+
+    input_width = size(input, dim=2)
+
+    iextent = input_width - mod(input_width, self % stride)
+
+    ! Stride along the width of the input
+    stride_over_input: do concurrent(i = 1:iextent:self % stride)
+      
+      ! Index of the pooling layer
+      ii = i / self % stride + 1
+      iend = i + self % pool_size - 1
+      
+      maxpool_for_each_channel: do concurrent(n = 1:self % channels)
+        
+        ! Get and store the location of the maximum value
+        maxloc_x = maxloc(input(n, i:iend), dim=1)
+        self % maxloc(n,ii) = maxloc_x + i - 1
+        
+        self % output(n,ii) = input(n, self % maxloc(n,ii))
+        
+      end do maxpool_for_each_channel
+    
+    end do stride_over_input
+  
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    implicit none
+    class(maxpool1d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    real, intent(in) :: gradient(:,:)
+    integer :: gradient_shape(2)
+    integer :: channels, width
+    integer :: i, n
+
+    gradient_shape = shape(gradient)
+    channels = gradient_shape(1)
+    width = gradient_shape(2)
+
+    ! The gradient of a max-pooling layer is assigned to the stored max locations
+    do concurrent(n = 1:channels, i = 1:width)
+      self % gradient(n, self % maxloc(n,i)) = gradient(n,i)
+    end do
+  
+  end subroutine backward
+
+
+end submodule nf_maxpool1d_layer_submodule
diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90
index 5916924e..2bd7ce8c 100644
--- a/src/nf/nf_network.f90
+++ b/src/nf/nf_network.f90
@@ -32,17 +32,19 @@ module nf_network
 
     procedure, private :: evaluate_batch_1d
     procedure, private :: forward_1d
+    procedure, private :: forward_1d_int
     procedure, private :: forward_2d
     procedure, private :: forward_3d
     procedure, private :: predict_1d
+    procedure, private :: predict_1d_int
     procedure, private :: predict_2d
     procedure, private :: predict_3d
     procedure, private :: predict_batch_1d
     procedure, private :: predict_batch_3d
 
     generic :: evaluate => evaluate_batch_1d
-    generic :: forward => forward_1d, forward_2d, forward_3d
-    generic :: predict => predict_1d, predict_2d, predict_3d
+    generic :: forward => forward_1d, forward_1d_int, forward_2d, forward_3d
+    generic :: predict => predict_1d, predict_1d_int, predict_2d, predict_3d
     generic :: predict_batch => predict_batch_1d, predict_batch_3d
 
   end type network
@@ -95,6 +97,12 @@ module subroutine forward_1d(self, input)
         !! 1-d input data
     end subroutine forward_1d
 
+    module subroutine forward_1d_int(self, input)
+      !! Same as `forward_1d` except `integer`
+      class(network), intent(in out) :: self
+      integer, intent(in) :: input(:)
+    end subroutine forward_1d_int
+
     module subroutine forward_2d(self, input)
       !! Apply a forward pass through the network.
       !!
@@ -137,6 +145,13 @@ module function predict_1d(self, input) result(res)
         !! Output of the network
     end function predict_1d
 
+    module function predict_1d_int(self, input) result(res)
+      !! Same as `predict_1d` except `integer`
+      class(network), intent(in out) :: self
+      integer, intent(in) :: input(:)
+      real, allocatable :: res(:)
+    end function predict_1d_int
+
     module function predict_2d(self, input) result(res)
       !! Return the output of the network given the input 1-d array.
       class(network), intent(in out) :: self
@@ -201,6 +216,7 @@ module integer function get_num_params(self)
       !! Network instance
     end function get_num_params
 
+
     module function get_params(self) result(params)
       !! Get the network parameters (weights and biases).
       class(network), intent(in) :: self
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index f344c5c5..1752f1f8 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -1,5 +1,6 @@
 submodule(nf_network) nf_network_submodule
 
+  use nf_conv1d_layer, only: conv1d_layer
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_dropout_layer, only: dropout_layer
@@ -7,12 +8,17 @@
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
+  use nf_locally_connected1d_layer, only: locally_connected1d_layer
+  use nf_maxpool1d_layer, only: maxpool1d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
+  use nf_reshape2d_layer, only: reshape2d_layer
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
+  use nf_layernorm_layer, only: layernorm_layer
   use nf_layer, only: layer
-  use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape
+  use nf_layer_constructors, only: conv1d, conv2d, dense, flatten, input, maxpool1d, maxpool2d, reshape, reshape2d
   use nf_loss, only: quadratic
   use nf_optimizers, only: optimizer_base_type, sgd
   use nf_parallel, only: tile_indices
@@ -46,7 +52,7 @@ module function network_from_layers(layers) result(res)
       error stop 'Error: A network must have at least 2 layers.'
 
     ! The first layer must be an input layer
-    if (.not. layers(1) % name == 'input') &
+    if (.not. layers(1) % name == 'input' .and. .not. layers(1) % name == 'embedding') &
       error stop 'Error: First layer in the network must be an input layer.'
 
     !TODO Ensure that the layers are in allowed sequence:
@@ -73,12 +79,24 @@ module function network_from_layers(layers) result(res)
             type is(conv2d_layer)
               res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
               n = n + 1
+            type is(locally_connected1d_layer)
+              res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
+              n = n + 1
             type is(maxpool2d_layer)
               res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
               n = n + 1
             type is(reshape3d_layer)
               res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
               n = n + 1
+            type is(maxpool1d_layer)
+              res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
+              n = n + 1
+            type is(conv1d_layer)
+              res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
+              n = n + 1
+            type is(reshape2d_layer)
+              res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
+              n = n + 1
             class default
               n = n + 1
           end select
@@ -147,7 +165,6 @@ module subroutine backward(self, output, loss)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(conv2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
-
           type is(flatten_layer)
             if (size(self % layers(n) % layer_shape) == 2) then
               call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient_2d)
@@ -156,13 +173,22 @@ module subroutine backward(self, output, loss)
             end if
           type is(maxpool2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
-
           type is(reshape3d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(linear2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(self_attention_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(maxpool1d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(reshape2d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(conv1d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(locally_connected1d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(layernorm_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 
@@ -207,8 +233,9 @@ module subroutine forward_1d(self, input)
     integer :: n
 
     ! Set the input array into the input layer
-    select type(input_layer => self % layers(1) % p); type is(input1d_layer)
-      call input_layer % set(input)
+    select type(input_layer => self % layers(1) % p)
+      type is(input1d_layer)
+        call input_layer % set(input)
     end select
 
     do n = 2, size(self % layers)
@@ -217,6 +244,21 @@ module subroutine forward_1d(self, input)
 
   end subroutine forward_1d
 
+  module subroutine forward_1d_int(self, input)
+    class(network), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    integer :: n
+
+    select type(input_layer => self % layers(1) % p)
+      type is(embedding_layer)
+        call input_layer % forward(input)
+    end select
+
+    do n = 2, size(self % layers)
+      call self % layers(n) % forward(self % layers(n - 1))
+    end do
+
+  end subroutine forward_1d_int
 
   module subroutine forward_2d(self, input)
     class(network), intent(in out) :: self
@@ -281,6 +323,31 @@ module function predict_1d(self, input) result(res)
 
   end function predict_1d
 
+  module function predict_1d_int(self, input) result(res)
+    class(network), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    real, allocatable :: res(:)
+    integer :: n, num_layers
+
+    num_layers = size(self % layers)
+
+    call self % set_training_mode(.false.)
+    call self % forward(input)
+    call self % set_training_mode(.true.)
+
+    select type(output_layer => self % layers(num_layers) % p)
+      type is(dense_layer)
+        res = output_layer % output
+      type is(dropout_layer)
+        res = output_layer % output
+      type is(flatten_layer)
+        res = output_layer % output
+      class default
+        error stop 'network % output not implemented for ' // &
+          trim(self % layers(num_layers) % name) // ' layer'
+    end select
+
+  end function predict_1d_int
 
   module function predict_2d(self, input) result(res)
     class(network), intent(in out) :: self
@@ -438,7 +505,6 @@ module function get_num_params(self)
 
   end function get_num_params
 
-
   module function get_params(self) result(params)
     class(network), intent(in) :: self
     real, allocatable :: params(:)
@@ -458,7 +524,6 @@ module function get_params(self) result(params)
 
   end function get_params
 
-
   module function get_gradients(self) result(gradients)
     class(network), intent(in) :: self
     real, allocatable :: gradients(:)
@@ -618,6 +683,12 @@ module subroutine update(self, optimizer, batch_size)
         type is(conv2d_layer)
           call co_sum(this_layer % dw)
           call co_sum(this_layer % db)
+        type is(conv1d_layer)
+          call co_sum(this_layer % dw)
+          call co_sum(this_layer % db)
+        type is(locally_connected1d_layer)
+          call co_sum(this_layer % dw)
+          call co_sum(this_layer % db)
       end select
     end do
 #endif
@@ -635,6 +706,12 @@ module subroutine update(self, optimizer, batch_size)
         type is(conv2d_layer)
           this_layer % dw = 0
           this_layer % db = 0
+        type is(conv1d_layer)
+          this_layer % dw = 0
+          this_layer % db = 0
+        type is(locally_connected1d_layer)
+          this_layer % dw = 0
+          this_layer % db = 0
       end select
     end do
 
diff --git a/src/nf/nf_reshape2d_layer.f90 b/src/nf/nf_reshape2d_layer.f90
new file mode 100644
index 00000000..6b99729b
--- /dev/null
+++ b/src/nf/nf_reshape2d_layer.f90
@@ -0,0 +1,77 @@
+module nf_reshape2d_layer
+
+    !! This module provides the concrete reshape layer type.
+    !! It is used internally by the layer type.
+    !! It is not intended to be used directly by the user.
+  
+    use nf_base_layer, only: base_layer
+  
+    implicit none
+  
+    private
+    public :: reshape2d_layer
+  
+    type, extends(base_layer) :: reshape2d_layer
+  
+      !! Concrete implementation of a reshape layer type
+      !! It implements only rank-1 to rank-2 reshaping.
+  
+      integer :: input_shape(1)
+      integer :: output_shape(2)
+      real, allocatable :: gradient(:)
+      real, allocatable :: output(:,:)
+  
+    contains
+  
+      procedure :: backward
+      procedure :: forward
+      procedure :: init
+  
+    end type reshape2d_layer
+  
+    interface reshape2d_layer
+      pure module function reshape2d_layer_cons(output_shape) result(res)
+        !! This function returns the `reshape_layer` instance.
+        integer, intent(in) :: output_shape(2)
+          !! The shape of the output
+        type(reshape2d_layer) :: res
+          !! reshape_layer instance
+      end function reshape2d_layer_cons
+    end interface reshape2d_layer
+  
+    interface
+  
+      pure module subroutine backward(self, input, gradient)
+        !! Apply the backward pass for the reshape2d layer.
+        !! This is just flattening to a rank-1 array.
+        class(reshape2d_layer), intent(in out) :: self
+          !! Dense layer instance
+        real, intent(in) :: input(:)
+          !! Input from the previous layer
+        real, intent(in) :: gradient(:,:)
+          !! Gradient from the next layer
+      end subroutine backward
+  
+      pure module subroutine forward(self, input)
+        !! Apply the forward pass for the reshape2d layer.
+        !! This is just a reshape from rank-1 to rank-2 array.
+        class(reshape2d_layer), intent(in out) :: self
+          !! Dense layer instance
+        real, intent(in) :: input(:)
+          !! Input from the previous layer
+      end subroutine forward
+  
+      module subroutine init(self, input_shape)
+        !! Initialize the layer data structures.
+        !!
+        !! This is a deferred procedure from the `base_layer` abstract type.
+        class(reshape2d_layer), intent(in out) :: self
+          !! Dense layer instance
+        integer, intent(in) :: input_shape(:)
+          !! Shape of the input layer
+      end subroutine init
+  
+    end interface
+  
+  end module nf_reshape2d_layer
+  
\ No newline at end of file
diff --git a/src/nf/nf_reshape2d_layer_submodule.f90 b/src/nf/nf_reshape2d_layer_submodule.f90
new file mode 100644
index 00000000..487d5cb8
--- /dev/null
+++ b/src/nf/nf_reshape2d_layer_submodule.f90
@@ -0,0 +1,50 @@
+submodule(nf_reshape2d_layer) nf_reshape2d_layer_submodule
+
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+contains
+
+  pure module function reshape2d_layer_cons(output_shape) result(res)
+    integer, intent(in) :: output_shape(2)
+    type(reshape2d_layer) :: res
+    res % output_shape = output_shape
+  end function reshape2d_layer_cons
+
+
+  pure module subroutine backward(self, input, gradient)
+    class(reshape2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real, intent(in) :: gradient(:,:)
+    ! The `input` dummy argument is not used but nevertheless declared
+    ! because the abstract type requires it.
+    self % gradient = pack(gradient, .true.)
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(reshape2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    self % output = reshape(input, self % output_shape)
+  end subroutine forward
+
+
+  module subroutine init(self, input_shape)
+    class(reshape2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % input_shape = input_shape
+
+    allocate(self % gradient(input_shape(1)))
+    self % gradient = 0
+
+    allocate(self % output( &
+      self % output_shape(1), &
+      self % output_shape(2) &
+      ))
+    self % output = 0
+
+  end subroutine init
+
+end submodule nf_reshape2d_layer_submodule
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 741e9930..ec4e139e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -6,14 +6,21 @@ foreach(execid
   linear2d_layer
   parametric_activation
   dense_layer
+  conv1d_layer
   conv2d_layer
+  locally_connected1d_layer
+  maxpool1d_layer
   maxpool2d_layer
   flatten_layer
   insert_flatten
   reshape_layer
+  reshape2d_layer
   multihead_attention_layer
+  embedding_layer
+  layernorm
   dense_network
   get_set_network_params
+  conv1d_network
   conv2d_network
   optimizers
   loss
diff --git a/test/test_conv1d_layer.f90 b/test/test_conv1d_layer.f90
new file mode 100644
index 00000000..81d03c1f
--- /dev/null
+++ b/test/test_conv1d_layer.f90
@@ -0,0 +1,77 @@
+program test_conv1d_layer
+
+    use iso_fortran_env, only: stderr => error_unit
+    use nf, only: conv1d, input, layer
+    use nf_input2d_layer, only: input2d_layer
+  
+    implicit none
+  
+    type(layer) :: conv1d_layer, input_layer
+    integer, parameter :: filters = 32, kernel_size=3
+    real, allocatable :: sample_input(:,:), output(:,:)
+    real, parameter :: tolerance = 1e-7
+    logical :: ok = .true.
+  
+    conv1d_layer = conv1d(filters, kernel_size)
+  
+    if (.not. conv1d_layer % name == 'conv1d') then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer has its name set correctly.. failed'
+    end if
+  
+    if (conv1d_layer % initialized) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer should not be marked as initialized yet.. failed'
+    end if
+  
+    if (.not. conv1d_layer % activation == 'relu') then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer defaults to relu activation.. failed'
+    end if
+  
+    input_layer = input(3, 32)
+    call conv1d_layer % init(input_layer)
+  
+    if (.not. conv1d_layer % initialized) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer should now be marked as initialized.. failed'
+    end if
+  
+    if (.not. all(conv1d_layer % input_layer_shape == [3, 32])) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer input layer shape should be correct.. failed'
+    end if
+  
+    if (.not. all(conv1d_layer % layer_shape == [filters, 30])) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer input layer shape should be correct.. failed'
+    end if
+  
+    ! Minimal conv1d layer: 1 channel, 3x3 pixel image;
+    allocate(sample_input(1, 3))
+    sample_input = 0
+  
+    input_layer = input(1, 3)
+    conv1d_layer = conv1d(filters, kernel_size)
+    call conv1d_layer % init(input_layer)
+  
+    select type(this_layer => input_layer % p); type is(input2d_layer)
+      call this_layer % set(sample_input)
+    end select
+  
+    call conv1d_layer % forward(input_layer)
+    call conv1d_layer % get_output(output)
+  
+    if (.not. all(abs(output) < tolerance)) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
+    end if
+  
+    if (ok) then
+      print '(a)', 'test_conv1d_layer: All tests passed.'
+    else
+      write(stderr, '(a)') 'test_conv1d_layer: One or more tests failed.'
+      stop 1
+    end if
+  
+end program test_conv1d_layer
diff --git a/test/test_conv1d_network.f90 b/test/test_conv1d_network.f90
new file mode 100644
index 00000000..5a353cf9
--- /dev/null
+++ b/test/test_conv1d_network.f90
@@ -0,0 +1,154 @@
+program test_conv1d_network
+
+    use iso_fortran_env, only: stderr => error_unit
+    use nf, only: conv1d, input, network, dense, sgd, maxpool1d
+  
+    implicit none
+  
+    type(network) :: net
+    real, allocatable :: sample_input(:,:), output(:,:)
+    logical :: ok = .true.
+  
+    ! 3-layer convolutional network
+    net = network([ &
+      input(3, 32), &
+      conv1d(filters=16, kernel_size=3), &
+      conv1d(filters=32, kernel_size=3) &
+    ])
+  
+    if (.not. size(net % layers) == 3) then
+      write(stderr, '(a)') 'conv2d network should have 3 layers.. failed'
+      ok = .false.
+    end if
+  
+    ! Test for output shape
+    allocate(sample_input(3, 32))
+    sample_input = 0
+  
+    call net % forward(sample_input)
+    call net % layers(3) % get_output(output)
+  
+    if (.not. all(shape(output) == [32, 28])) then
+      write(stderr, '(a)') 'conv1d network output should have correct shape.. failed'
+      ok = .false.
+    end if
+  
+    deallocate(sample_input, output)
+  
+    training1: block
+  
+      type(network) :: cnn
+      real :: y(1)
+      real :: tolerance = 1e-4
+      integer :: n
+      integer, parameter :: num_iterations = 1000
+  
+      ! Test training of a minimal constant mapping
+      allocate(sample_input(1, 5))
+      call random_number(sample_input)
+  
+      cnn = network([ &
+        input(1, 5), &
+        conv1d(filters=1, kernel_size=3), &
+        conv1d(filters=1, kernel_size=3), &
+        dense(1) &
+      ])
+  
+      y = [0.1234567]
+  
+      do n = 1, num_iterations
+        call cnn % forward(sample_input)
+        call cnn % backward(y)
+        call cnn % update(optimizer=sgd(learning_rate=1.))
+
+        if (all(abs(cnn % predict(sample_input) - y) < tolerance)) exit
+      end do
+  
+      if (.not. n <= num_iterations) then
+        write(stderr, '(a)') &
+          'convolutional network 1 should converge in simple training.. failed'
+        ok = .false.
+      end if
+  
+    end block training1
+  
+    training2: block
+  
+      type(network) :: cnn
+      real :: x(1, 8)
+      real :: y(1)
+      real :: tolerance = 1e-4
+      integer :: n
+      integer, parameter :: num_iterations = 1000
+  
+      call random_number(x)
+      y = [0.1234567]
+  
+      cnn = network([ &
+        input(1, 8), &
+        conv1d(filters=1, kernel_size=3), &
+        maxpool1d(pool_size=2), &
+        conv1d(filters=1, kernel_size=3), &
+        dense(1) &
+      ])
+  
+      do n = 1, num_iterations
+        call cnn % forward(x)
+        call cnn % backward(y)
+        call cnn % update(optimizer=sgd(learning_rate=1.))
+        if (all(abs(cnn % predict(x) - y) < tolerance)) exit
+      end do
+  
+      if (.not. n <= num_iterations) then
+        write(stderr, '(a)') &
+          'convolutional network 2 should converge in simple training.. failed'
+        ok = .false.
+      end if
+  
+    end block training2
+  
+    training3: block
+  
+      type(network) :: cnn
+      real :: x(1, 12)
+      real :: y(9)
+      real :: tolerance = 1e-4
+      integer :: n
+      integer, parameter :: num_iterations = 5000
+  
+      call random_number(x)
+      y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789, 0.67890, 0.78901, 0.89012, 0.90123]
+  
+      cnn = network([ &
+        input(1, 12), &
+        conv1d(filters=1, kernel_size=3), & ! 1x12x12 input, 1x10x10 output
+        maxpool1d(pool_size=2), &           ! 1x10x10 input, 1x5x5 output
+        conv1d(filters=1, kernel_size=3), & ! 1x5x5 input, 1x3x3 output
+        dense(9) &                          ! 9 outputs
+      ])
+  
+      do n = 1, num_iterations
+        call cnn % forward(x)
+        call cnn % backward(y)
+        call cnn % update(optimizer=sgd(learning_rate=1.))
+        if (all(abs(cnn % predict(x) - y) < tolerance)) exit
+      end do
+  
+      if (.not. n <= num_iterations) then
+        write(stderr, '(a)') &
+          'convolutional network 3 should converge in simple training.. failed'
+        ok = .false.
+      end if
+  
+    end block training3
+  
+  
+    if (ok) then
+      print '(a)', 'test_conv1d_network: All tests passed.'
+    else
+      write(stderr, '(a)') 'test_conv1d_network: One or more tests failed.'
+      stop 1
+    end if
+  
+  end program test_conv1d_network
+  
\ No newline at end of file
diff --git a/test/test_conv2d_network.f90 b/test/test_conv2d_network.f90
index 1bdfc677..73c4595a 100644
--- a/test/test_conv2d_network.f90
+++ b/test/test_conv2d_network.f90
@@ -60,6 +60,7 @@ program test_conv2d_network
       call cnn % forward(sample_input)
       call cnn % backward(y)
       call cnn % update(optimizer=sgd(learning_rate=1.))
+
       if (all(abs(cnn % predict(sample_input) - y) < tolerance)) exit
     end do
 
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
new file mode 100644
index 00000000..99b7fca6
--- /dev/null
+++ b/test/test_embedding_layer.f90
@@ -0,0 +1,133 @@
+program test_embedding_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_embedding_layer, only: embedding_layer
+  use nf_layer, only: layer
+  use nf_layer_constructors, only: embedding_constructor => embedding
+  implicit none
+
+  logical :: ok = .true.
+  integer :: sample_input(3) = [2, 1, 3]
+
+  call test_simple(ok, sample_input)
+  call test_positional_trigonometric(ok, sample_input)
+  call test_positional_absolute(ok, sample_input)
+
+  if (ok) then
+    print '(a)', 'test_embedding_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.'
+    error stop 1
+  end if
+
+contains
+  subroutine test_simple(ok, sample_input)
+    logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
+
+    real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2])
+    real :: output_flat(6)
+    real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6])
+    real :: dw_flat(8)
+    real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8])
+    type(embedding_layer) :: embedding
+
+    embedding = embedding_layer(vocab_size=4, model_dimension=2)
+    call embedding % init([3])
+    embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [6])
+    if (.not. all(output_flat.eq.expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+
+    call embedding % backward(sample_input, sample_gradient)
+    dw_flat = reshape(embedding % dw, shape(dw_flat))
+    if (.not. all(dw_flat.eq.expected_dw_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect dw values.. failed'
+    end if
+  end subroutine test_simple
+
+  subroutine test_positional_trigonometric(ok, sample_input)
+    logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
+
+    real :: output_flat(12)
+    real :: expected_output_flat(12) = reshape([&
+        0.3, 0.941471, 1.4092975,&
+        1.3, 0.64030236, 0.08385316,&
+        0.3, 0.10999984, 0.51999867,&
+        1.3, 1.09995, 1.4998&
+    ], [12])
+    type(embedding_layer) :: embedding
+
+    real :: theta
+    integer :: i, pos
+
+    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=1)
+    call embedding % init([3])
+    embedding % weights = reshape([&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2&
+    ], [5, 4])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [12])
+    if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then
+      ok = .false.
+      write(stderr, '(a)') 'trigonometric positional encoding returned incorrect values.. failed'
+    end if
+  end subroutine test_positional_trigonometric
+
+  subroutine test_positional_absolute(ok, sample_input)
+    logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
+
+    real :: output_flat(12)
+    real :: expected_output_flat(12) = reshape([&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5&
+    ], [12])
+    type(embedding_layer) :: embedding
+
+    real :: theta
+    integer :: i, pos
+
+    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=2)
+    call embedding % init([3])
+    embedding % weights = reshape([&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2&
+    ], [5, 4])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [12])
+    if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then
+      ok = .false.
+      write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed'
+    end if
+  end subroutine test_positional_absolute
+
+  subroutine test_embedding_constructor(ok, sample_input)
+    logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
+
+    type(layer) :: embedding_constructed
+
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=0)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=1)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=2)
+  end subroutine test_embedding_constructor
+end program test_embedding_layer
diff --git a/test/test_layernorm.f90 b/test/test_layernorm.f90
new file mode 100644
index 00000000..6a897575
--- /dev/null
+++ b/test/test_layernorm.f90
@@ -0,0 +1,193 @@
+program test_layernorm_instance
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_layernorm_layer, only: layernorm_layer
+  use nf_linear2d_layer, only: linear2d_layer
+  use nf_layer, only: layer
+  use nf, only: sgd, layernorm, network, input, flatten, linear2d
+  implicit none
+
+  logical :: ok = .true.
+  type(layernorm_layer) :: layernorm_instance
+  real :: sample_input(3, 4) = reshape([0.0, 10.1, 0.2, 10.3, 0.4, 10.5, 0.6, 10.7, 10.8, 0.9, 0.11, 0.12], [3, 4])
+  real :: sample_gradient(3, 4) = reshape([0.1, 3., 2., 0.1, 3., 3., 0.1, 2., 0.1, 3., 0.1, 3.], [3, 4])
+
+  layernorm_instance = layernorm_layer()
+  call layernorm_instance % init([3, 4])
+
+  call test_layernorm_forward(layernorm_instance, sample_input, ok)
+  call test_layernorm_backward(layernorm_instance, sample_input, sample_gradient, ok)
+  call test_layernorm_gradients(sample_input, sample_gradient, ok)
+  call test_layernorm_integration(ok)
+
+  if (ok) then
+    print '(a)', 'test_layernorm_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_layernorm_layer: One or more tests failed.'
+    error stop 1
+  end if
+
+contains
+  function allclose(x, y) result(res)
+    real, intent(in) :: x(:)
+    real, intent(in) :: y(:)
+    logical :: res
+
+    res = all(abs(x - y) <= (1e-06 + 1e-05 * abs(y)))
+  end function allclose
+
+  subroutine test_layernorm_forward(layernorm_instance, input, ok)
+    type(layernorm_layer), intent(in out) :: layernorm_instance
+    real, intent(in out) :: input(:, :)
+    logical, intent(in out) :: ok
+    real :: output_shape(2)
+    real :: output_flat(12)
+    real :: expected_shape(2) = [3, 4]
+    real :: expected_output_flat(12) = [&
+        -0.693158746, 0.939844191, -0.992156327, 1.72702277, -0.970368207, 0.971188426,&
+        -0.552177250, 1.05800152, 1.02837324, -0.481686622, -1.02747762, -1.00740564&
+    ]
+
+    call layernorm_instance % forward(input)
+
+    output_shape = shape(layernorm_instance % output)
+    if (.not. all(output_shape.eq.expected_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect shape.. failed'
+    end if
+    output_flat = reshape(layernorm_instance % output, shape(output_flat))
+    if (.not. allclose(output_flat, expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+  end subroutine test_layernorm_forward
+
+  subroutine test_layernorm_backward(layernorm_instance, input, gradient, ok)
+    type(layernorm_layer), intent(in out) :: layernorm_instance
+    real, intent(in out) :: input(:, :)
+    real, intent(in out) :: gradient(:, :)
+    logical, intent(in out) :: ok
+
+    real :: gradient_shape(2)
+    real :: gradient_flat(12)
+    real :: expected_gradient_shape(2) = [3, 4]
+    real :: expected_gradient_flat(12) = [&
+        -0.227230772, 0.103088334, -9.88590196E-02, -2.86390483E-02, 0.283811331, 0.277955681,&
+        -0.215662330, -0.105019525, -0.269407451, 0.471532196, -0.281880081, 9.03107598E-02&
+    ]
+
+    real :: d_gamma(4)
+    real :: expected_d_gamma(4) = [0.765904069, 0.175162792,  2.16362262, -4.57002449]
+    real :: d_beta(4)
+    real :: expected_d_beta(4) = [5.1, 6.1, 2.2, 6.1]
+
+    call layernorm_instance % backward(input, gradient)
+
+    gradient_shape = shape(layernorm_instance % gradient)
+    if (.not. all(gradient_shape.eq.expected_gradient_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect gradient shape.. failed'
+    end if
+    gradient_flat = reshape(layernorm_instance % gradient, shape(gradient_flat))
+    if (.not. allclose(gradient_flat, expected_gradient_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect gradient values.. failed'
+    end if
+
+    if (.not. allclose(layernorm_instance % d_gamma, expected_d_gamma)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect d_gamma values.. failed'
+    end if
+    if (.not. allclose(layernorm_instance % d_beta, expected_d_beta)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect d_beta values.. failed'
+    end if
+  end subroutine test_layernorm_backward
+
+  subroutine test_layernorm_gradients(input, gradient, ok)
+    real, intent(in out) :: input(:, :)
+    real, intent(in out) :: gradient(:, :)
+    logical, intent(in out) :: ok
+    type(layernorm_layer) :: layernorm_instance
+    type(sgd) :: optim
+
+    real :: parameters(8)
+    real :: expected_parameters(8)
+    real :: updated_output(12)
+    real :: expected_updated_output(12) = [&
+        -0.738849819, 0.881645918, -1.03555739,&
+        1.66299772, -1.02966857, 0.908487320,&
+        -0.562230229, 1.01311040, 0.984123051,&
+        -0.564699769, -1.13543355, -1.11444426&
+    ]
+
+    layernorm_instance = layernorm_layer()
+    call layernorm_instance % init([3, 4])
+
+    call layernorm_instance % forward(input)
+    call layernorm_instance % backward(input, gradient)
+
+    if (layernorm_instance % get_num_params() /= 8) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect number of parameters.. failed'
+    end if
+
+    expected_parameters(1: 4) = 1.
+    expected_parameters(5: 8) = 0.
+    parameters = layernorm_instance % get_params()
+    if (.not. all(parameters.eq.expected_parameters)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect parameters.. failed'
+    end if
+
+    optim = SGD(learning_rate=0.01)
+    call optim % minimize(parameters, layernorm_instance % get_gradients())
+    call layernorm_instance % set_params(parameters)
+
+    call layernorm_instance % forward(input)
+
+    updated_output = reshape(layernorm_instance % output, [12])
+    if (.not. allclose(updated_output, expected_updated_output)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect output after parameters update.. failed'
+    end if
+  end subroutine test_layernorm_gradients
+
+  subroutine test_layernorm_integration(ok)
+    logical, intent(in out) :: ok
+
+    type(network) :: net
+    real :: x(2, 3) = reshape([0.1, 2., 0.3, 4., 0.5, 6.], [2, 3])
+    real :: y(6) = [0.7, 0.2, 0.1, 0.1, 0.01, 0.9]
+    real :: tolerance = 0.1
+    integer :: epoch
+    integer :: epochs = 10000
+
+    net = network([&
+        input(2, 3),&
+        linear2d(3),&
+        layernorm(),&
+        flatten()&
+    ])
+
+    ! Kaiming weights to achieve semblance of convergance
+    select type(l => net % layers(2) % p)
+      type is(linear2d_layer)
+      call random_number(l % weights)
+      l % weights = l % weights * sqrt(2. / 6.)
+      l % biases = 0.2
+    end select
+
+    do epoch = 1, epochs
+      call net % forward(x)
+      call net % backward(y)
+      call net % update(optimizer=sgd(learning_rate=0.001))
+      if (all(abs(net % predict(x) - y) < tolerance)) exit
+    end do
+
+    if (.not. epoch <= epochs) then
+      write(stderr, '(a)') &
+        'linear2d + layernorm should converge in simple training.. failed'
+      ok = .false.
+    end if
+  end subroutine test_layernorm_integration
+end program test_layernorm_instance
diff --git a/test/test_locally_connected1d_layer.f90 b/test/test_locally_connected1d_layer.f90
new file mode 100644
index 00000000..e8a30cfc
--- /dev/null
+++ b/test/test_locally_connected1d_layer.f90
@@ -0,0 +1,78 @@
+program test_locally_connected1d_layer
+
+    use iso_fortran_env, only: stderr => error_unit
+    use nf, only: locally_connected1d, input, layer
+    use nf_input2d_layer, only: input2d_layer
+  
+    implicit none
+  
+    type(layer) :: locally_connected_1d_layer, input_layer
+    integer, parameter :: filters = 32, kernel_size=3
+    real, allocatable :: sample_input(:,:), output(:,:)
+    real, parameter :: tolerance = 1e-7
+    logical :: ok = .true.
+  
+    locally_connected_1d_layer = locally_connected1d(filters, kernel_size)
+  
+    if (.not. locally_connected_1d_layer % name == 'locally_connected1d') then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer has its name set correctly.. failed'
+    end if
+  
+    if (locally_connected_1d_layer % initialized) then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer should not be marked as initialized yet.. failed'
+    end if
+  
+    if (.not. locally_connected_1d_layer % activation == 'relu') then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer defaults to relu activation.. failed'
+    end if
+  
+    input_layer = input(3, 32)
+    call locally_connected_1d_layer % init(input_layer)
+  
+    if (.not. locally_connected_1d_layer % initialized) then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer should now be marked as initialized.. failed'
+    end if
+  
+    if (.not. all(locally_connected_1d_layer % input_layer_shape == [3, 32])) then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer input layer shape should be correct.. failed'
+    end if
+  
+    if (.not. all(locally_connected_1d_layer % layer_shape == [filters, 30])) then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer input layer shape should be correct.. failed'
+    end if
+  
+    ! Minimal locally_connected_1d layer: 1 channel, 3x3 pixel image;
+    allocate(sample_input(1, 3))
+    sample_input = 0
+  
+    input_layer = input(1, 3)
+    locally_connected_1d_layer = locally_connected1d(filters, kernel_size)
+    call locally_connected_1d_layer % init(input_layer)
+  
+    select type(this_layer => input_layer % p); type is(input2d_layer)
+      call this_layer % set(sample_input)
+    end select
+  
+    call locally_connected_1d_layer % forward(input_layer)
+    call locally_connected_1d_layer % get_output(output)
+  
+  
+    if (.not. all(abs(output) < tolerance)) then
+      ok = .false.
+      write(stderr, '(a)') 'locally_connected1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
+    end if
+  
+    if (ok) then
+      print '(a)', 'test_locally_connected1d_layer: All tests passed.'
+    else
+      write(stderr, '(a)') 'test_locally_connected1d_layer: One or more tests failed.'
+      stop 1
+    end if
+  
+end program test_locally_connected1d_layer
diff --git a/test/test_maxpool1d_layer.f90 b/test/test_maxpool1d_layer.f90
new file mode 100644
index 00000000..023a2c33
--- /dev/null
+++ b/test/test_maxpool1d_layer.f90
@@ -0,0 +1,95 @@
+program test_maxpool1d_layer
+
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: maxpool1d, input, layer
+  use nf_input2d_layer, only: input2d_layer
+  use nf_maxpool1d_layer, only: maxpool1d_layer
+
+  implicit none
+
+  type(layer) :: maxpool_layer, input_layer
+  integer, parameter :: pool_size = 2, stride = 2
+  integer, parameter :: channels = 3, length = 32
+  integer, parameter :: input_shape(2) = [channels, length]
+  integer, parameter :: output_shape(2) = [channels, length / 2]
+  real, allocatable :: sample_input(:,:), output(:,:), gradient(:,:)
+  integer :: i
+  logical :: ok = .true., gradient_ok = .true.
+
+  maxpool_layer = maxpool1d(pool_size)
+
+  if (.not. maxpool_layer % name == 'maxpool1d') then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer has its name set correctly.. failed'
+  end if
+
+  if (maxpool_layer % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer should not be marked as initialized yet.. failed'
+  end if
+
+  input_layer = input(channels, length)
+  call maxpool_layer % init(input_layer)
+
+  if (.not. maxpool_layer % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer should now be marked as initialized.. failed'
+  end if
+
+  if (.not. all(maxpool_layer % input_layer_shape == input_shape)) then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer input layer shape should be correct.. failed'
+  end if
+
+  if (.not. all(maxpool_layer % layer_shape == output_shape)) then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer output layer shape should be correct.. failed'
+  end if
+
+  ! Allocate and initialize sample input data
+  allocate(sample_input(channels, length))
+  do concurrent(i = 1:length)
+    sample_input(:,i) = i
+  end do
+
+  select type(this_layer => input_layer % p); type is(input2d_layer)
+    call this_layer % set(sample_input)
+  end select
+
+  call maxpool_layer % forward(input_layer)
+  call maxpool_layer % get_output(output)
+
+  do i = 1, length / 2
+    if (.not. all(output(:,i) == stride * i)) then
+      ok = .false.
+      write(stderr, '(a)') 'maxpool1d layer forward pass correctly propagates the max value.. failed'
+    end if
+  end do
+
+  ! Test the backward pass
+  allocate(gradient, source=output)
+  call maxpool_layer % backward(input_layer, gradient)
+
+  select type(this_layer => maxpool_layer % p); type is(maxpool1d_layer)
+    do i = 1, length
+      if (mod(i,2) == 0) then
+        if (.not. all(sample_input(:,i) == this_layer % gradient(:,i))) gradient_ok = .false.
+      else
+        if (.not. all(this_layer % gradient(:,i) == 0)) gradient_ok = .false.
+      end if
+    end do
+  end select
+
+  if (.not. gradient_ok) then
+    ok = .false.
+    write(stderr, '(a)') 'maxpool1d layer backward pass produces the correct dL/dx.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_maxpool1d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_maxpool1d_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_maxpool1d_layer
diff --git a/test/test_reshape2d_layer.f90 b/test/test_reshape2d_layer.f90
new file mode 100644
index 00000000..52817eac
--- /dev/null
+++ b/test/test_reshape2d_layer.f90
@@ -0,0 +1,55 @@
+program test_reshape2d_layer
+
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: input, network, reshape2d_layer => reshape2d
+  use nf_datasets, only: download_and_unpack, keras_reshape_url
+
+  implicit none
+
+  type(network) :: net
+  real, allocatable :: sample_input(:), output(:,:)
+  integer, parameter :: output_shape(2) = [4,4]
+  integer, parameter :: input_size = product(output_shape)
+  character(*), parameter :: keras_reshape_path = 'keras_reshape.h5'
+  logical :: file_exists
+  logical :: ok = .true.
+
+  ! Create the network
+  net = network([ & 
+    input(input_size), & 
+    reshape2d_layer(output_shape) & 
+  ])
+
+  if (.not. size(net % layers) == 2) then
+    write(stderr, '(a)') 'the network should have 2 layers.. failed'
+    ok = .false.
+  end if
+
+  ! Initialize test data
+  allocate(sample_input(input_size))
+  call random_number(sample_input)
+
+  ! Propagate forward and get the output
+  call net % forward(sample_input)
+  call net % layers(2) % get_output(output)
+
+  ! Check shape of the output
+  if (.not. all(shape(output) == output_shape)) then
+    write(stderr, '(a)') 'the reshape layer produces expected output shape.. failed'
+    ok = .false.
+  end if
+
+  ! Check if reshaped input matches output
+  if (.not. all(reshape(sample_input, output_shape) == output)) then
+    write(stderr, '(a)') 'the reshape layer produces expected output values.. failed'
+    ok = .false.
+  end if
+
+  if (ok) then
+    print '(a)', 'test_reshape2d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_reshape2d_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_reshape2d_layer