diff --git a/examples/gaussian_process/plot_compare_gpr_krr.py b/examples/gaussian_process/plot_compare_gpr_krr.py
index 1eb771673b0d6..93af9f9a8aa7a 100644
--- a/examples/gaussian_process/plot_compare_gpr_krr.py
+++ b/examples/gaussian_process/plot_compare_gpr_krr.py
@@ -56,40 +56,43 @@
 
 import matplotlib.pyplot as plt
 
-from sklearn.kernel_ridge import KernelRidge
-from sklearn.model_selection import GridSearchCV
+# from sklearn.kernel_ridge import KernelRidge
+# from sklearn.model_selection import GridSearchCV
 from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared
+from sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared, RBF
 
 rng = np.random.RandomState(0)
 
 # Generate sample data
 X = 15 * rng.rand(100, 1)
 y = np.sin(X).ravel()
-y += 3 * (0.5 - rng.rand(X.shape[0]))  # add noise
-
-# Fit KernelRidge with parameter selection based on 5-fold cross validation
-param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3],
-              "kernel": [ExpSineSquared(l, p)
-                         for l in np.logspace(-2, 2, 10)
-                         for p in np.logspace(0, 2, 10)]}
-kr = GridSearchCV(KernelRidge(), param_grid=param_grid)
-stime = time.time()
-kr.fit(X, y)
-print("Time for KRR fitting: %.3f" % (time.time() - stime))
-
-gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \
+y += 1.5 * (0.5 - rng.rand(X.shape[0]))  # add noise
+
+# # Fit KernelRidge with parameter selection based on 5-fold cross validation
+# param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3],
+#               "kernel": [ExpSineSquared(l, p)
+#                          for l in np.logspace(-2, 2, 10)
+#                          for p in np.logspace(0, 2, 10)]}
+# kr = GridSearchCV(KernelRidge(), param_grid=param_grid)
+# stime = time.time()
+# kr.fit(X, y)
+# print("Time for KRR fitting: %.3f" % (time.time() - stime))
+
+gp_kernel = (
+    ExpSineSquared(1.0, periodicity=5.0, periodicity_bounds=(1e-2, 1e1))
+    * RBF(length_scale=10, length_scale_bounds=(1e-2, 1e2))
     + WhiteKernel(1e-1)
-gpr = GaussianProcessRegressor(kernel=gp_kernel)
+)
+gpr = GaussianProcessRegressor(kernel=gp_kernel, normalize_y=True)
 stime = time.time()
 gpr.fit(X, y)
 print("Time for GPR fitting: %.3f" % (time.time() - stime))
 
-# Predict using kernel ridge
-X_plot = np.linspace(0, 20, 10000)[:, None]
-stime = time.time()
-y_kr = kr.predict(X_plot)
-print("Time for KRR prediction: %.3f" % (time.time() - stime))
+# # Predict using kernel ridge
+X_plot = np.linspace(0, 40, 1000)[:, None]
+# stime = time.time()
+# y_kr = kr.predict(X_plot)
+# print("Time for KRR prediction: %.3f" % (time.time() - stime))
 
 # Predict using gaussian process regressor
 stime = time.time()
@@ -106,16 +109,16 @@
 lw = 2
 plt.scatter(X, y, c='k', label='data')
 plt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')
-plt.plot(X_plot, y_kr, color='turquoise', lw=lw,
-         label='KRR (%s)' % kr.best_params_)
+# plt.plot(X_plot, y_kr, color='turquoise', lw=lw,
+#          label='KRR (%s)' % kr.best_params_)
 plt.plot(X_plot, y_gpr, color='darkorange', lw=lw,
          label='GPR (%s)' % gpr.kernel_)
 plt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',
                  alpha=0.2)
 plt.xlabel('data')
 plt.ylabel('target')
-plt.xlim(0, 20)
+plt.xlim(0, 40)
 plt.ylim(-4, 4)
 plt.title('GPR versus Kernel Ridge')
-plt.legend(loc="best",  scatterpoints=1, prop={'size': 8})
+plt.legend(loc="best", scatterpoints=1, prop={'size': 8})
 plt.show()
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 008c24f294737..4d77255c2471d 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -1975,35 +1975,60 @@ def __call__(self, X, Y=None, eval_gradient=False):
             is True.
         """
         X = np.atleast_2d(X)
+        periodic_cst = np.pi / self.periodicity
+
         if Y is None:
-            dists = squareform(pdist(X, metric='euclidean'))
-            arg = np.pi * dists / self.periodicity
-            sin_of_arg = np.sin(arg)
-            K = np.exp(- 2 * (sin_of_arg / self.length_scale) ** 2)
+            # K = exp(-2/l^2 * sum_i( sin^2 (pi/p * (x_i - x_i')) ))
+            dists = squareform(pdist(
+                X,
+                metric=lambda u, v:
+                np.sum(np.sin(periodic_cst * (u - v)) ** 2)
+            ))
+            K = np.exp(-2 * dists / self.length_scale ** 2)
         else:
             if eval_gradient:
                 raise ValueError(
                     "Gradient can only be evaluated when Y is None.")
-            dists = cdist(X, Y, metric='euclidean')
-            K = np.exp(- 2 * (np.sin(np.pi / self.periodicity * dists)
-                              / self.length_scale) ** 2)
+            # K = exp(-2/l^2 * sum_i( sin^2 (pi/p * (x_i - y_i)) ))
+            dists = cdist(
+                X, Y,
+                metric=lambda u, v:
+                np.sum(np.sin(periodic_cst * (u - v)) ** 2)
+            )
+            K = np.exp(-2 * dists / self.length_scale ** 2)
 
         if eval_gradient:
-            cos_of_arg = np.cos(arg)
-            # gradient with respect to length_scale
             if not self.hyperparameter_length_scale.fixed:
-                length_scale_gradient = \
-                    4 / self.length_scale**2 * sin_of_arg**2 * K
-                length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
-            else:  # length_scale is kept fixed
+                # dK/dl = 4/l^3 * K * sum_i( sin^2 (pi/p * (x_i - x_i')) )
+                length_scale_gradient = 4 / self.length_scale ** 3
+                length_scale_gradient *= K
+                length_scale_gradient *= dists
+                length_scale_gradient = length_scale_gradient[..., np.newaxis]
+            else:
                 length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
-            # gradient with respect to p
+
             if not self.hyperparameter_periodicity.fixed:
-                periodicity_gradient = \
-                    4 * arg / self.length_scale**2 * cos_of_arg \
-                    * sin_of_arg * K
-                periodicity_gradient = periodicity_gradient[:, :, np.newaxis]
-            else:  # p is kept fixed
+                # dK/dp = (4 * pi)/(l^2 * p^2) * sum_i(
+                #    sin(pi/p * (x_i - x_i')) *
+                #    cos(pi/p * (x_i - x_i')) *
+                #    (x_i - x_i')
+                # )
+                periodicity_gradient = (4 * np.pi) / (
+                    self.length_scale ** 2 * self.periodicity ** 2
+                )
+                periodicity_gradient *= K
+                periodicity_gradient *= squareform(
+                    pdist(
+                        X,
+                        metric=lambda u, v: np.sum(
+                            np.sin(periodic_cst * (u - v))
+                            * np.cos(periodic_cst * (u - v))
+                            * (u - v)
+                        ),
+                    )
+                )
+                periodicity_gradient = periodicity_gradient[..., np.newaxis]
+            else:
                 periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))
 
             return K, np.dstack((length_scale_gradient, periodicity_gradient))
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index b56c0b06b5fc0..82090e9d87f62 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -367,3 +367,41 @@ def test_rational_quadratic_kernel():
     )
     with pytest.raises(AttributeError, match=message):
         kernel(X)
+
+
+def test_xxx():
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    rng = np.random.RandomState(0)
+
+    # Generate sample data
+    X = 15 * rng.rand(10, 1)
+    y = np.sin(X).ravel()
+    y += 3 * (0.5 - rng.rand(X.shape[0]))  # add noise
+
+    gp_kernel = (
+        ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) +
+        WhiteKernel(1e-1)
+    )
+    gpr = GaussianProcessRegressor(kernel=gp_kernel, normalize_y=True)
+    gpr.fit(X, y)
+
+
+def test_yyy():
+    import numpy as np
+    from sklearn.gaussian_process.kernels import ExpSineSquared
+
+    L = 1.0
+
+    # create some train/test data on a grid
+    train_len = 4
+    r = np.linspace(0, L, train_len)
+    train_x, train_y = np.meshgrid(r, r)
+    train_in = np.stack((train_x.flatten(), train_y.flatten()), axis=-1)
+
+    # get the kernel
+    kernel = ExpSineSquared()
+
+    K = kernel(train_in) + 1e-4 * np.eye(train_len**2)
+
+    print(np.sort(np.linalg.eigh(K)[0]))
+    print()