Adding first round of tests

unit8co · dennisbader · Jun 17, 2024 · Dec 19, 2023 · Jan 4, 2024 · Jan 4, 2024
commit 48126e646a65eabb3c231ca021c39323feebe3c3
@@ -1487,7 +1487,7 @@ def test_multiple_ts(self, mode):
             ),
         ],
     )
-    def test_correct_generated_weights(self, config):
+    def test_correct_generated_weights_equal(self, config):
         model, training_size = config
         train_y = self.sine_univariate1[:training_size]
         _, _, weights = model._create_lagged_data(
@@ -1503,6 +1503,64 @@ def test_correct_generated_weights(self, config):
         assert len(weights) == weights_size
         assert (weights == [1] * weights_size).all()
 
+    @pytest.mark.parametrize(
+        "config",
+        [
+            (RegressionModel(lags=4), 10),
+            (RegressionModel(lags=8, model=LinearRegression()), 20),
+            (RegressionModel(lags=16, model=RandomForestRegressor()), 50),
+            (
+                RegressionModel(lags=2, model=HistGradientBoostingRegressor()),
+                100,
+            ),
+        ],
+    )
+    def test_correct_generated_weights_linear(self, config):
+        model, training_size = config
+        weights_size = training_size - len(model.lags["target"])
+
+        expected_weights = np.linspace(0, 1, weights_size + 1)[1:]
+
+        train_y = self.sine_univariate1[:training_size]
+        _, _, weights = model._create_lagged_data(
+            target_series=train_y,
+            past_covariates=None,
+            future_covariates=None,
+            max_samples_per_ts=None,
+            sample_weight="linear_decay",
+        )
+
+        assert len(weights) == weights_size
+        assert (weights == expected_weights).all()
+
+    @pytest.mark.parametrize(
+        "config",
+        [
+            (RegressionModel(lags=4), 10, 10),
+            (RegressionModel(lags=8, model=LinearRegression()), 20, 10),
+            (RegressionModel(lags=16, model=RandomForestRegressor()), 50, 10),
+            (RegressionModel(lags=2, model=HistGradientBoostingRegressor()), 100, 10),
+        ],
+    )
+    def test_correct_generated_weights_exponential(self, config):
+        model, training_size, decay_rate = config
+        weights_size = training_size - len(model.lags["target"])
+
+        time_steps = np.linspace(0, 1, weights_size)
+        expected_weights = np.exp(-decay_rate * (1 - time_steps))
+
+        train_y = self.sine_univariate1[:training_size]
+        _, _, weights = model._create_lagged_data(
+            target_series=train_y,
+            past_covariates=None,
+            future_covariates=None,
+            max_samples_per_ts=None,
+            sample_weight="exponential_decay",
+        )
+
+        assert len(weights) == weights_size
+        np.testing.assert_array_almost_equal(weights, expected_weights)
+
     @pytest.mark.parametrize("mode", [True, False])
     def test_only_future_covariates(self, mode):
         model = RegressionModel(lags_future_covariates=[-2], multi_models=mode)

@@ -19,7 +19,7 @@
 from darts.utils.timeseries_generation import (
     constant_timeseries,
     exponential_timeseries,
-    linear_timeseries,
+    non_zero_linear_timeseries,
 )
 from darts.utils.utils import get_single_series, series2seq
 
@@ -336,13 +336,13 @@ def create_lagged_data(
                     1, start=times_i[0], end=times_i[-1], freq=times_i.freq
                 ).values()
             elif sample_weight == "linear_decay":
-                weights = linear_timeseries(
+                weights = non_zero_linear_timeseries(
                     start=times_i[0], end=times_i[-1], freq=times_i.freq
                 ).values()
             elif sample_weight == "exponential_decay":
                 weights = exponential_timeseries(
                     start=times_i[0], end=times_i[-1], freq=times_i.freq
-                ).values()
+                ).values()[::-1]
             elif isinstance(sample_weight, TimeSeries):
                 weights = sample_weight.values()
             else:

diff --git a/darts/utils/timeseries_generation.py b/darts/utils/timeseries_generation.py
@@ -516,6 +516,62 @@ def autoregressive_timeseries(
     )
 
 
+def non_zero_linear_timeseries(
+    start_value: float = 0,
+    end_value: float = 1,
+    start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"),
+    end: Optional[Union[pd.Timestamp, int]] = None,
+    length: Optional[int] = None,
+    freq: Union[str, int] = None,
+    column_name: Optional[str] = "linear",
+    dtype: np.dtype = np.float64,
+) -> TimeSeries:
+    """
+    Creates a univariate TimeSeries with a starting value of `start_value` that increases linearly such that
+    it takes on the value `end_value` at the last entry of the TimeSeries. This means that
+    the difference between two adjacent entries will be equal to
+    (`end_value` - `start_value`) / (`length` - 1).
+
+    Parameters
+    ----------
+    start_value
+        The value of the first entry in the TimeSeries.
+    end_value
+        The value of the last entry in the TimeSeries.
+    start
+        The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas
+        DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with
+        either `length` or `end`.
+    end
+        Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is
+        set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer.
+    length
+        Optionally, the length of the returned index. Works only with either `start` or `end`.
+    freq
+        The time difference between two adjacent entries in the returned index. In case `start` is a timestamp,
+        a DateOffset alias is expected; see
+        `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_.
+        By default, "D" (daily) is used.
+        If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex.
+        The freq is optional for generating an integer index (if not specified, 1 is used).
+    column_name
+        Optionally, the name of the value column for the returned TimeSeries
+    dtype
+        The desired NumPy dtype (np.float32 or np.float64) for the resulting series
+
+    Returns
+    -------
+    TimeSeries
+        A linear TimeSeries created as indicated above.
+    """
+
+    index = generate_index(start=start, end=end, freq=freq, length=length)
+    values = np.linspace(start_value, end_value, len(index) + 1, dtype=dtype)[1:]
+    return TimeSeries.from_times_and_values(
+        index, values, freq=freq, columns=pd.Index([column_name])
+    )
+
+
 def _extend_time_index_until(
     time_index: Union[pd.DatetimeIndex, pd.RangeIndex],
     until: Optional[Union[int, str, pd.Timestamp]],