diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py
new file mode 100644
index 0000000..e3dd4cc
--- /dev/null
+++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=2s)
+"""
+import unittest
+import numpy
+from numpy.random import random
+from sklearn.neural_network import MLPRegressor
+from sklearn.metrics import mean_absolute_error
+from sklearn.exceptions import ConvergenceWarning
+from mlinsights.ext_test_case import ExtTestCase, ignore_warnings
+from mlinsights.mlmodel import LinkedMLPRegressor
+from mlinsights.mlmodel import (
+    run_test_sklearn_pickle,
+    run_test_sklearn_clone,
+    run_test_sklearn_grid_search_cv,
+)
+
+
+class TestLinkedMLPRegression(ExtTestCase):
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_diff(self):
+        X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
+        Y = numpy.array([1.0, 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,))
+        clq.fit(X, Y)
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_linked_int(self):
+        X = numpy.array(
+            [[0.1, 0.11], [0.2, 0.21], [0.3, 0.31], [0.4, 0.41], [0.5, 0.51]]
+        )
+        Y = numpy.array([1.0, 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=2)
+        clq.fit(X, Y)
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_linked(self):
+        linked = [
+            ((0, "c", 1, 2), (0, "i", 0)),
+            ((1, "c", 0, 0), (1, "c", 2, 0)),
+            ((0, "c", 1, 1), (0, "c", 0, 2)),
+            ((0, "i", 2), (0, "c", 0, 0)),
+            ((1, "i", 0), (1, "c", 1, 0)),
+            ((0, "i", 1), (0, "c", 0, 1)),
+        ]
+        X = numpy.array(
+            [[0.1, 0.11], [0.2, 0.21], [0.3, 0.31], [0.4, 0.41], [0.5, 0.51]]
+        )
+        Y = numpy.array([1.0, 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=linked)
+        clq.fit(X, Y)
+        self.assertEqual(clq.linked_, linked)
+        self.assertEqual(clq.coefs_[0][1, 2], clq.intercepts_[0][0])
+        self.assertEqual(clq.coefs_[1][0, 0], clq.coefs_[1][2, 0])
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_pickle(self):
+        X = random(100)
+        eps1 = (random(90) - 0.5) * 0.1
+        eps2 = random(10) * 2
+        eps = numpy.hstack([eps1, eps2])
+        X = X.reshape((100, 1))  # pylint: disable=E1101
+        Y = X.ravel() * 3.4 + 5.6 + eps
+        run_test_sklearn_pickle(lambda: MLPRegressor(hidden_layer_sizes=(3,)), X, Y)
+        run_test_sklearn_pickle(
+            lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y
+        )
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_clone(self):
+        run_test_sklearn_clone(lambda: LinkedMLPRegressor())
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_grid_search(self):
+        X = random(100)
+        eps1 = (random(90) - 0.5) * 0.1
+        eps2 = random(10) * 2
+        eps = numpy.hstack([eps1, eps2])
+        X = X.reshape((100, 1))  # pylint: disable=E1101
+        Y = X.ravel() * 3.4 + 5.6 + eps
+        self.assertRaise(
+            lambda: run_test_sklearn_grid_search_cv(
+                lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y
+            ),
+            ValueError,
+        )
+        res = run_test_sklearn_grid_search_cv(
+            lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)),
+            X,
+            Y,
+            learning_rate_init=[0.001, 0.0001],
+        )
+        self.assertIn("model", res)
+        self.assertIn("score", res)
+        self.assertGreater(res["score"], -1)
+        self.assertLesser(res["score"], 11)
+
+
+if __name__ == "__main__":
+    # TestLinkedMLPRegression().test_regression_linked()
+    unittest.main()
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..835bce4
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,34 @@
+image:
+  - Visual Studio 2019
+environment:
+  matrix:
+    - PYTHON: "C:\\Python310-x64"
+      PYTHON_VERSION: "3.10.x"
+      PYTHON_ARCH: "64"
+      SKL: '>=1.0'
+init:
+  - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
+
+install:
+  - "%PYTHON%\\python -m pip install --upgrade pip"
+  # for many packages
+  - "%PYTHON%\\Scripts\\pip install llvmlite numba"
+  # install precompiled versions not available on pypi
+  - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio"
+  # other dependencies
+  - "%PYTHON%\\Scripts\\pip install -r requirements.txt"
+  - "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%"
+build: off
+
+before_test:
+  - "%PYTHON%\\python -u setup.py build_ext --inplace --verbose"
+
+test_script:
+  - "%PYTHON%\\python -u setup.py unittests"
+
+after_test:
+  - "%PYTHON%\\python -u setup.py bdist_wheel"
+
+artifacts:
+  - path: dist
+    name: mlinsights
diff --git a/mlinsights/mlmodel/__init__.py b/mlinsights/mlmodel/__init__.py
index ca83052..21210e2 100644
--- a/mlinsights/mlmodel/__init__.py
+++ b/mlinsights/mlmodel/__init__.py
@@ -4,6 +4,7 @@
 from .decision_tree_logreg import DecisionTreeLogisticRegression
 from .extended_features import ExtendedFeatures
 from .interval_regressor import IntervalRegressor
+from .linked_mlpregressor import LinkedMLPRegressor
 from .kmeans_constraint import ConstraintKMeans
 from .kmeans_l1 import KMeansL1L2
 from .ml_featurizer import model_featurizer
diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py
new file mode 100644
index 0000000..15476bf
--- /dev/null
+++ b/mlinsights/mlmodel/linked_mlpregressor.py
@@ -0,0 +1,296 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=E1101
+"""
+@file
+@brief Implements a quantile non-linear regression.
+"""
+import random
+from sklearn.neural_network import MLPRegressor
+
+
+class LinkedMLPBase:
+    """
+    Overloads methods from :epkg:`sklearn:neural_networks:MLPRegressor`
+    and insert the logic to train linked coefficients.
+    """
+
+    def _initialize(self, y, layer_units, dtype):
+        super()._initialize(y, layer_units, dtype)
+        if hasattr(self, "linked_"):
+            return
+        if self.linked is None:
+            self.linked_ = None
+            return
+        if isinstance(self.linked, int):
+
+            def _get_random(layer, selected, n_sel):
+                indices = []
+                c = self.coefs_[layer]
+                for i in range(c.shape[0]):
+                    for j in range(c.shape[1]):
+                        key = layer, "c", i, j
+                        if key in selected:
+                            continue
+                        indices.append(key)
+                c = self.intercepts_[layer]
+                for i in range(c.shape[0]):
+                    key = layer, "i", i
+                    if key in selected:
+                        continue
+                    indices.append(key)
+
+                random.shuffle(indices)
+                inds = []
+                pos = 0
+                nis = set()
+                while len(inds) < n_sel and pos < len(indices):
+                    ind = indices[pos]
+                    if ind[2] in nis:
+                        pos += 1
+                        continue
+                    inds.append(pos)
+                    nis.add(ind[2])
+                    pos += 1
+                return tuple(indices[p] for p in inds)
+
+            n_coefs = sum(
+                [c.size for c in self.coefs_] + [c.size for c in self.intercepts_]
+            )
+            linked = []
+            selected = set()
+            unchanged = 0
+            while len(linked) < n_coefs and unchanged < 10:
+                layer = random.randint(0, len(self.coefs_) - 1)
+                inds = _get_random(layer, selected, self.linked)
+                if len(inds) <= 1:
+                    unchanged += 1
+                    continue
+                unchanged = 0
+                for i in inds:
+                    selected.add(i)
+                linked.append(inds)
+            self.linked_ = linked
+            self._fix_links(self.coefs_, self.intercepts_)
+        elif isinstance(self.linked, list):
+            self.linked_ = self.linked
+            self._fix_links(self.coefs_, self.intercepts_)
+        else:
+            raise TypeError(f"Unexpected type for linked {type(self.linked)}.")
+
+    def _fix_links(self, coefs, intercepts):
+        if self.linked_ is None:
+            return
+        for links in self.linked_:
+            if len(links) <= 1:
+                raise RuntimeError(f"Unexpected value for link {links}.")
+            total = 0
+            for key in links:
+                if key[1] == "c":
+                    v = coefs[key[0]][key[2:]]
+                else:
+                    v = intercepts[key[0]][key[2]]
+                total += v
+            total /= len(links)
+            for key in links:
+                if key[1] == "c":
+                    coefs[key[0]][key[2:]] = total
+                else:
+                    intercepts[key[0]][key[2]] = total
+
+    def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
+        batch_loss, coef_grads, intercept_grads = super()._backprop(
+            X, y, activations, deltas, coef_grads, intercept_grads
+        )
+        self._fix_links(coef_grads, intercept_grads)
+        return batch_loss, coef_grads, intercept_grads
+
+
+class LinkedMLPRegressor(LinkedMLPBase, MLPRegressor):
+    """
+    A neural networks regression for which a subset a coefficients
+    share the same value. In practice, it should make the training
+    more stable. See parameter *linked*.
+
+    :param hidden_layer_sizes: tuple, length = n_layers - 2, default (100,)
+        The ith element represents the number of neurons in the ith
+        hidden layer.
+    :param activation: {'identity', 'logistic', 'tanh', 'relu'}, default 'relu'
+        Activation function for the hidden layer.
+        - 'identity', no-op activation, useful to implement linear bottleneck,
+          returns :math:`f(x) = x`
+        - 'logistic', the logistic sigmoid function,
+          returns :math:`f(x) = 1 / (1 + exp(-x))`.
+        - 'tanh', the hyperbolic tan function,
+          returns :math:`f(x) = tanh(x)`.
+        - 'relu', the rectified linear unit function,
+          returns :math:`f(x) = \\max(0, x)`.
+    :param solver: ``{'lbfgs', 'sgd', 'adam'}``, default 'adam'
+        The solver for weight optimization.
+        - *'lbfgs'* is an optimizer in the family of quasi-Newton methods.
+        - *'sgd'* refers to stochastic gradient descent.
+        - *'adam'* refers to a stochastic gradient-based optimizer proposed by
+          Kingma, Diederik, and Jimmy Ba
+        Note: The default solver 'adam' works pretty well on relatively
+        large datasets (with thousands of training samples or more) in terms of
+        both training time and validation score.
+        For small datasets, however, 'lbfgs' can converge faster and perform
+        better.
+    :param alpha: float, optional, default 0.0001
+        :epkg:`L2` penalty (regularization term) parameter.
+    :param batch_size: int, optional, default 'auto'
+        Size of minibatches for stochastic optimizers.
+        If the solver is 'lbfgs', the classifier will not use minibatch.
+        When set to "auto", `batch_size=min(200, n_samples)`
+    :param learning_rate: {'constant', 'invscaling', 'adaptive'}, default 'constant'
+        Learning rate schedule for weight updates.
+        - 'constant' is a constant learning rate given by
+          'learning_rate_init'.
+        - 'invscaling' gradually decreases the learning rate ``learning_rate_``
+          at each time step 't' using an inverse scaling exponent of 'power_t'.
+          effective_learning_rate = learning_rate_init / pow(t, power_t)
+        - 'adaptive' keeps the learning rate constant to
+          'learning_rate_init' as long as training loss keeps decreasing.
+          Each time two consecutive epochs fail to decrease training loss by at
+          least tol, or fail to increase validation score by at least tol if
+          'early_stopping' is on, the current learning rate is divided by 5.
+        Only used when solver='sgd'.
+    :param learning_rate_init: double, optional, default 0.001
+        The initial learning rate used. It controls the step-size
+        in updating the weights. Only used when solver='sgd' or 'adam'.
+    :param power_t: double, optional, default 0.5
+        The exponent for inverse scaling learning rate.
+        It is used in updating effective learning rate when the learning_rate
+        is set to 'invscaling'. Only used when solver='sgd'.
+    :param max_iter: int, optional, default 200
+        Maximum number of iterations. The solver iterates until convergence
+        (determined by 'tol') or this number of iterations. For stochastic
+        solvers ('sgd', 'adam'), note that this determines the number of epochs
+        (how many times each data point will be used), not the number of
+        gradient steps.
+    :param shuffle: bool, optional, default True
+        Whether to shuffle samples in each iteration. Only used when
+        solver='sgd' or 'adam'.
+    :param random_state: int, RandomState instance or None, optional, default None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+    :param tol: float, optional, default 1e-4
+        Tolerance for the optimization. When the loss or score is not improving
+        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,
+        unless ``learning_rate`` is set to 'adaptive', convergence is
+        considered to be reached and training stops.
+    :param verbose: bool, optional, default False
+        Whether to print progress messages to stdout.
+    :param warm_start: bool, optional, default False
+        When set to True, reuse the solution of the previous
+        call to fit as initialization, otherwise, just erase the
+        previous solution. See :term:`the Glossary <warm_start>`.
+    :param momentum: float, default 0.9
+        Momentum for gradient descent update.  Should be between 0 and 1. Only
+        used when solver='sgd'.
+    :param nesterovs_momentum: boolean, default True
+        Whether to use Nesterov's momentum. Only used when solver='sgd' and
+        momentum > 0.
+    :param early_stopping: bool, default False
+        Whether to use early stopping to terminate training when validation
+        score is not improving. If set to true, it will automatically set
+        aside 10% of training data as validation and terminate training when
+        validation score is not improving by at least ``tol`` for
+        ``n_iter_no_change`` consecutive epochs.
+        Only effective when solver='sgd' or 'adam'
+    :param validation_fraction: float, optional, default 0.1
+        The proportion of training data to set aside as validation set for
+        early stopping. Must be between 0 and 1.
+        Only used if early_stopping is True
+    :param beta_1: float, optional, default 0.9
+        Exponential decay rate for estimates of first moment vector in adam,
+        should be in [0, 1). Only used when solver='adam'
+    :param beta_2: float, optional, default 0.999
+        Exponential decay rate for estimates of second moment vector in adam,
+        should be in [0, 1). Only used when solver='adam'
+    :param epsilon: float, optional, default 1e-8
+        Value for numerical stability in adam. Only used when solver='adam'
+    :param n_iter_no_change: int, optional, default 10
+        Maximum number of epochs to not meet ``tol`` improvement.
+        Only effective when solver='sgd' or 'adam'
+    :param linked: can be a float to defined the ratio of linked coefficients,
+        or list of set of indices
+
+    Fitted attributes:
+
+    * `loss_`: float
+        The current loss computed with the loss function.
+    * `coefs_`: list, length n_layers - 1
+        The ith element in the list represents the weight matrix corresponding
+        to layer i.
+    * `intercepts_`: list, length n_layers - 1
+        The ith element in the list represents the bias vector corresponding to
+        layer i + 1.
+    * `n_iter_`: int,
+        The number of iterations the solver has ran.
+    * `n_layers_`: int
+        Number of layers.
+    * `n_outputs_`: int
+        Number of outputs.
+    * `out_activation_`: string
+        Name of the output activation function.
+    """
+
+    def __init__(
+        self,
+        hidden_layer_sizes=(100,),
+        activation="relu",
+        solver="sgd",
+        alpha=0.0001,
+        batch_size="auto",
+        learning_rate="constant",
+        learning_rate_init=0.001,
+        power_t=0.5,
+        max_iter=200,
+        shuffle=True,
+        random_state=None,
+        tol=1e-4,
+        verbose=False,
+        warm_start=False,
+        momentum=0.9,
+        nesterovs_momentum=True,
+        early_stopping=False,
+        validation_fraction=0.1,
+        beta_1=0.9,
+        beta_2=0.999,
+        epsilon=1e-8,
+        n_iter_no_change=10,
+        max_fun=15000,
+        linked=None,
+    ):
+        """
+        See :epkg:`sklearn:neural_networks:MLPRegressor`
+        """
+        sup = super(LinkedMLPRegressor, self)  # pylint: disable=R1725
+        sup.__init__(
+            hidden_layer_sizes=hidden_layer_sizes,
+            activation=activation,
+            solver=solver,
+            alpha=alpha,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            learning_rate_init=learning_rate_init,
+            power_t=power_t,
+            max_iter=max_iter,
+            shuffle=shuffle,
+            random_state=random_state,
+            tol=tol,
+            verbose=verbose,
+            warm_start=warm_start,
+            momentum=momentum,
+            nesterovs_momentum=nesterovs_momentum,
+            early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
+            beta_1=beta_1,
+            beta_2=beta_2,
+            epsilon=epsilon,
+            n_iter_no_change=n_iter_no_change,
+            max_fun=max_fun,
+        )
+        self.linked = linked
diff --git a/mlinsights/mlmodel/quantile_mlpregressor.py b/mlinsights/mlmodel/quantile_mlpregressor.py
index 630f649..edf3a8b 100644
--- a/mlinsights/mlmodel/quantile_mlpregressor.py
+++ b/mlinsights/mlmodel/quantile_mlpregressor.py
@@ -186,10 +186,13 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
         # due to the modification of the loss function.
         deltas[last] = self._modify_loss_derivatives(deltas[last])
 
+        # recent version of scikit-learn
         # Compute gradient for the last layer
         temp = self._compute_loss_grad(
             last, n_samples, activations, deltas, coef_grads, intercept_grads
         )
+
+        inplace_derivative = DERIVATIVES[self.activation]
         if temp is None:
             # recent version of scikit-learn
             # Compute gradient for the last layer
@@ -197,7 +200,6 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
                 last, n_samples, activations, deltas, coef_grads, intercept_grads
             )
 
-            inplace_derivative = DERIVATIVES[self.activation]
             # Iterate over the hidden layers
             for i in range(self.n_layers_ - 2, 0, -1):
                 deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
@@ -207,18 +209,12 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
                     i - 1, n_samples, activations, deltas, coef_grads, intercept_grads
                 )
         else:
-            coef_grads, intercept_grads = temp
-
             # Iterate over the hidden layers
             for i in range(self.n_layers_ - 2, 0, -1):
                 deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
-                inplace_derivative = DERIVATIVES[self.activation]
                 inplace_derivative(activations[i], deltas[i - 1])
 
-                (
-                    coef_grads,
-                    intercept_grads,
-                ) = self._compute_loss_grad(
+                self._compute_loss_grad(
                     i - 1, n_samples, activations, deltas, coef_grads, intercept_grads
                 )