Trend Detection #

`statista.time_series.trend` #

Trend detection mixin for TimeSeries.

`Trend` #

Bases: _TimeSeriesStub

Trend detection methods for TimeSeries.

Source code in src\statista\time_series\trend.py

class Trend(_TimeSeriesStub):
    """Trend detection methods for TimeSeries."""

    def mann_kendall(
        self,
        alpha: float = DEFAULT_ALPHA,
        method: str = "original",
        lag: int = None,
        column: str = None,
    ) -> DataFrame:
        """Mann-Kendall trend test.

        Tests the null hypothesis of no monotonic trend. Supports multiple variants
        to handle autocorrelated data.

        Args:
            alpha: Significance level. Default 0.05.
            method: Test variant.
                - "original": Standard MK (assumes serial independence).
                - "hamed_rao": Variance correction for autocorrelation (Hamed & Rao, 1998).
                  **Recommended for environmental data** where autocorrelation inflates significance.
                - "yue_wang": Alternative autocorrelation correction (Yue & Wang, 2004).
                - "pre_whitening": Remove lag-1 autocorrelation before testing.
                - "trend_free_pre_whitening": Remove trend, pre-whiten, re-add trend, then test.
            lag: Maximum lag for autocorrelation correction (Hamed-Rao / Yue-Wang).
                If None, uses n//2 - 1.
            column: Column to test. If None, tests all columns.

        Returns:
            pandas.DataFrame: One row per column with: trend, h, p_value, z, tau, s, var_s,
                slope, intercept.

        Examples:
            Detect a strong increasing trend in a linear signal with noise:

            >>> import numpy as np
            >>> from statista.time_series import TimeSeries
            >>> np.random.seed(42)
            >>> data = np.arange(50, dtype=float) + np.random.randn(50) * 3
            >>> ts = TimeSeries(data)
            >>> result = ts.mann_kendall()
            >>> result.loc["Series1", "trend"]
            'increasing'
            >>> round(float(result.loc["Series1", "z"]), 4)
            9.1177
            >>> round(float(result.loc["Series1", "tau"]), 4)
            0.8906

            Verify no trend in pure random noise:

            >>> np.random.seed(42)
            >>> ts_noise = TimeSeries(np.random.randn(50))
            >>> result_noise = ts_noise.mann_kendall()
            >>> result_noise.loc["Series1", "trend"]
            'no trend'
            >>> round(float(result_noise.loc["Series1", "p_value"]), 4)
            0.1701

            Use the Hamed-Rao autocorrelation correction:

            >>> np.random.seed(42)
            >>> data_ac = np.arange(50, dtype=float) + np.random.randn(50) * 3
            >>> ts_ac = TimeSeries(data_ac)
            >>> result_ac = ts_ac.mann_kendall(method="hamed_rao")
            >>> result_ac.loc["Series1", "trend"]
            'increasing'
            >>> round(float(result_ac.loc["Series1", "z"]), 4)
            9.1177

        References:
            Mann, H.B. (1945). Nonparametric tests against trend. Econometrica, 13(3), 245-259.

            Hamed, K.H. and Rao, A.R. (1998). A modified Mann-Kendall trend test for autocorrelated
            data. Journal of Hydrology, 204(1-4), 182-196.

            Yue, S. and Wang, C. (2004). The Mann-Kendall test modified by effective sample size to
            detect trend in serially correlated hydrological series. Water Resources Management, 18, 201-218.
        """
        cols = [column] if column is not None else list(self.columns)
        rows = []

        for col in cols:
            data = self[col].dropna().values
            result = _mann_kendall_single(data, alpha=alpha, method=method, lag=lag)
            rows.append({"column": col, **result})

        result_df = DataFrame(rows).set_index("column")
        return result_df

    def sens_slope(
        self,
        alpha: float = DEFAULT_ALPHA,
        column: str = None,
    ) -> DataFrame:
        """Sen's slope estimator — robust non-parametric trend magnitude.

        Computes the median of all pairwise slopes. More robust to outliers than OLS.
        Always pair with ``mann_kendall()`` — Sen's slope gives the magnitude, MK gives
        the significance.

        Uses ``scipy.stats.theilslopes`` internally.

        Args:
            alpha: Significance level for confidence interval. Default 0.05.
            column: Column to analyze. If None, analyzes all columns.

        Returns:
            pandas.DataFrame: One row per column with: slope, intercept, slope_lower_ci,
                slope_upper_ci.

        Examples:
            Estimate the slope of a linear signal (true slope = 2.0):

            >>> import numpy as np
            >>> from statista.time_series import TimeSeries
            >>> np.random.seed(42)
            >>> data = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
            >>> ts = TimeSeries(data)
            >>> result = ts.sens_slope()
            >>> round(float(result.loc["Series1", "slope"]), 4)
            1.9629
            >>> round(float(result.loc["Series1", "intercept"]), 4)
            -0.6347

            Check confidence interval bounds on the slope:

            >>> round(float(result.loc["Series1", "slope_lower_ci"]), 4)
            1.9046
            >>> round(float(result.loc["Series1", "slope_upper_ci"]), 4)
            2.0217

            Weaker trend with more noise (true slope = 0.5):

            >>> np.random.seed(42)
            >>> data2 = np.arange(30, dtype=float) * 0.5 + np.random.randn(30) * 2
            >>> ts2 = TimeSeries(data2)
            >>> result2 = ts2.sens_slope()
            >>> round(float(result2.loc["Series1", "slope"]), 4)
            0.4304
            >>> round(float(result2.loc["Series1", "intercept"]), 4)
            0.0259

        References:
            Sen, P.K. (1968). Estimates of the regression coefficient based on Kendall's tau.
            JASA, 63(324), 1379-1389.
        """
        cols = [column] if column is not None else list(self.columns)
        rows = []

        for col in cols:
            data = self[col].dropna().values
            x = np.arange(len(data))
            slope, intercept, low_slope, high_slope = theilslopes(data, x, alpha=alpha)
            rows.append(
                {
                    "column": col,
                    "slope": float(slope),
                    "intercept": float(intercept),
                    "slope_lower_ci": float(low_slope),
                    "slope_upper_ci": float(high_slope),
                }
            )

        result_df = DataFrame(rows).set_index("column")
        return result_df

    def detrend(self, method: str = "linear", order: int = 1) -> Any:
        """Remove trend from the time series.

        Args:
            method: Detrending method.
                - "linear": Remove linear trend via scipy.signal.detrend.
                - "constant": Subtract the mean.
                - "polynomial": Remove polynomial trend of given order.
                - "sens": Remove trend using Sen's slope (robust to outliers).
            order: Polynomial order (only used when method="polynomial"). Default 1.

        Returns:
            TimeSeries: New TimeSeries with the trend removed. Same index as original.

        Examples:
            Remove a linear trend (result has zero mean):

            >>> import numpy as np
            >>> from statista.time_series import TimeSeries
            >>> np.random.seed(42)
            >>> data = np.arange(100, dtype=float) + np.random.randn(100) * 5
            >>> ts = TimeSeries(data)
            >>> detrended = ts.detrend(method="linear")
            >>> round(float(detrended.values.mean()), 4)
            0.0
            >>> round(float(detrended.values.std()), 4)
            4.5136

            Remove trend using robust Sen's slope estimator:

            >>> np.random.seed(42)
            >>> data2 = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
            >>> ts2 = TimeSeries(data2)
            >>> detrended2 = ts2.detrend(method="sens")
            >>> round(float(detrended2.values.mean()), 4)
            0.8666

            Subtract the mean (constant detrending):

            >>> np.random.seed(42)
            >>> ts3 = TimeSeries(np.random.randn(50) + 10)
            >>> detrended3 = ts3.detrend(method="constant")
            >>> round(float(detrended3.values.mean()), 4)
            0.0
        """
        from statista.time_series import TimeSeries

        result_data = np.empty_like(self.values, dtype=float)

        for i, col in enumerate(self.columns):
            data = self[col].values.astype(float)

            if method == "linear":
                result_data[:, i] = scipy_detrend(data, type="linear")
            elif method == "constant":
                result_data[:, i] = scipy_detrend(data, type="constant")
            elif method == "polynomial":
                x = np.arange(len(data), dtype=float)
                coeffs = np.polyfit(x, data, order)
                trend = np.polyval(coeffs, x)
                result_data[:, i] = data - trend
            elif method == "sens":
                x = np.arange(len(data), dtype=float)
                slope, intercept, _, _ = theilslopes(data, x)
                trend = intercept + slope * x
                result_data[:, i] = data - trend
            else:
                raise ValueError(
                    f"Unknown method '{method}'. Choose from 'linear', 'constant', 'polynomial', 'sens'."
                )

        result = TimeSeries(result_data, index=self.index, columns=list(self.columns))
        return result

    def innovative_trend_analysis(
        self,
        column: str = None,
        **kwargs: Any,
    ) -> tuple[DataFrame, tuple[Figure, Axes]]:
        """Innovative Trend Analysis (ITA) — Sen (2012) method.

        Splits the sorted data into two halves and plots the first half (x-axis) against
        the second half (y-axis). Points above the 1:1 line indicate an increasing trend,
        points below indicate a decreasing trend.

        A minimum of 20 observations is required so that each half has at least 10 points
        for a meaningful distribution comparison. This threshold reflects common practice
        for ITA; it is not prescribed by Sen (2012) but is widely used in the literature
        (e.g., Serinaldi et al., 2020).

        Args:
            column: Column to analyze. If None, uses first column.
            **kwargs: Passed to ``_adjust_axes_labels`` (title, xlabel, ylabel, etc.).

        Returns:
            tuple: (results_df, (fig, ax)).
                results_df has columns: column, trend_indicator (positive = increasing).

        Raises:
            ValueError: If the series has fewer than 20 observations after dropping NaN.

        Examples:
            >>> import numpy as np  # doctest: +SKIP
            >>> from statista.time_series import TimeSeries  # doctest: +SKIP
            >>> ts = TimeSeries(np.arange(100, dtype=float))  # doctest: +SKIP
            >>> result_df, (fig, ax) = ts.innovative_trend_analysis()  # doctest: +SKIP

        References:
            Sen, Z. (2012). Innovative Trend Analysis Methodology. Journal of Hydrologic
            Engineering, 17(9), 1042-1046.

            Serinaldi, F., Chebana, F., Kilsby, C.G. (2020). Dissecting innovative trend
            analysis. Stochastic Environmental Research and Risk Assessment, 34, 733-754.
        """
        if column is None:
            column = self.columns[0]

        data = np.sort(self[column].dropna().values)
        n = len(data)

        # Validate minimum sample size
        if n < 20:
            raise ValueError(
                f"Innovative trend analysis requires at least 20 observations, got {n}"
            )

        # Handle odd-length series
        if n % 2 != 0:
            data = data[:-1]
            n = len(data)
            warnings.warn(
                f"Column '{column}' has odd length. Last observation dropped for analysis.",
                UserWarning
            )

        mid = n // 2

        first_half = data[:mid]
        second_half = data[mid : 2 * mid]

        fig, ax = self._get_ax_fig(**kwargs)
        kwargs.pop("fig", None)
        kwargs.pop("ax", None)

        ax.scatter(
            first_half,
            second_half,
            alpha=0.5,
            s=15,
            color="steelblue",
            edgecolor="white",
            linewidth=0.3,
        )

        # 1:1 line
        min_val = min(first_half.min(), second_half.min())
        max_val = max(first_half.max(), second_half.max())
        ax.plot(
            [min_val, max_val], [min_val, max_val], "k-", linewidth=1, label="1:1 line"
        )

        # +/- 10% envelope
        range_val = max_val - min_val
        offset = 0.10 * range_val
        ax.plot(
            [min_val, max_val],
            [min_val + offset, max_val + offset],
            "r--",
            linewidth=0.7,
            label="+10%",
        )
        ax.plot(
            [min_val, max_val],
            [min_val - offset, max_val - offset],
            "b--",
            linewidth=0.7,
            label="-10%",
        )

        # Trend indicator: mean deviation from 1:1 line
        trend_indicator = float(np.mean(second_half - first_half))

        ax.annotate(
            f"Trend indicator: {trend_indicator:.3f}",
            xy=(0.05, 0.95),
            xycoords="axes fraction",
            fontsize=10,
            va="top",
        )

        if "title" not in kwargs:
            kwargs["title"] = f"Innovative Trend Analysis — {column}"
        if "xlabel" not in kwargs:
            kwargs["xlabel"] = "First half (sorted)"
        if "ylabel" not in kwargs:
            kwargs["ylabel"] = "Second half (sorted)"

        ax = self._adjust_axes_labels(ax, **kwargs)
        plt.show()

        result_df = DataFrame(
            [
                {
                    "column": column,
                    "trend_indicator": trend_indicator,
                }
            ]
        ).set_index("column")

        return result_df, (fig, ax)

`mann_kendall(alpha=DEFAULT_ALPHA, method='original', lag=None, column=None)` #

Mann-Kendall trend test.

Tests the null hypothesis of no monotonic trend. Supports multiple variants to handle autocorrelated data.

Parameters:

Name	Type	Description	Default
`alpha`	`float`	Significance level. Default 0.05.	`DEFAULT_ALPHA`
`method`	`str`	Test variant. - "original": Standard MK (assumes serial independence). - "hamed_rao": Variance correction for autocorrelation (Hamed & Rao, 1998). Recommended for environmental data where autocorrelation inflates significance. - "yue_wang": Alternative autocorrelation correction (Yue & Wang, 2004). - "pre_whitening": Remove lag-1 autocorrelation before testing. - "trend_free_pre_whitening": Remove trend, pre-whiten, re-add trend, then test.	`'original'`
`lag`	`int`	Maximum lag for autocorrelation correction (Hamed-Rao / Yue-Wang). If None, uses n//2 - 1.	`None`
`column`	`str`	Column to test. If None, tests all columns.	`None`

Returns:

Type	Description
`DataFrame`	pandas.DataFrame: One row per column with: trend, h, p_value, z, tau, s, var_s, slope, intercept.

Examples:

Detect a strong increasing trend in a linear signal with noise:

>>> import numpy as np
>>> from statista.time_series import TimeSeries
>>> np.random.seed(42)
>>> data = np.arange(50, dtype=float) + np.random.randn(50) * 3
>>> ts = TimeSeries(data)
>>> result = ts.mann_kendall()
>>> result.loc["Series1", "trend"]
'increasing'
>>> round(float(result.loc["Series1", "z"]), 4)
9.1177
>>> round(float(result.loc["Series1", "tau"]), 4)
0.8906

Verify no trend in pure random noise:

>>> np.random.seed(42)
>>> ts_noise = TimeSeries(np.random.randn(50))
>>> result_noise = ts_noise.mann_kendall()
>>> result_noise.loc["Series1", "trend"]
'no trend'
>>> round(float(result_noise.loc["Series1", "p_value"]), 4)
0.1701

Use the Hamed-Rao autocorrelation correction:

>>> np.random.seed(42)
>>> data_ac = np.arange(50, dtype=float) + np.random.randn(50) * 3
>>> ts_ac = TimeSeries(data_ac)
>>> result_ac = ts_ac.mann_kendall(method="hamed_rao")
>>> result_ac.loc["Series1", "trend"]
'increasing'
>>> round(float(result_ac.loc["Series1", "z"]), 4)
9.1177

References

Mann, H.B. (1945). Nonparametric tests against trend. Econometrica, 13(3), 245-259.

Hamed, K.H. and Rao, A.R. (1998). A modified Mann-Kendall trend test for autocorrelated data. Journal of Hydrology, 204(1-4), 182-196.

Yue, S. and Wang, C. (2004). The Mann-Kendall test modified by effective sample size to detect trend in serially correlated hydrological series. Water Resources Management, 18, 201-218.

Source code in src\statista\time_series\trend.py

def mann_kendall(
    self,
    alpha: float = DEFAULT_ALPHA,
    method: str = "original",
    lag: int = None,
    column: str = None,
) -> DataFrame:
    """Mann-Kendall trend test.

    Tests the null hypothesis of no monotonic trend. Supports multiple variants
    to handle autocorrelated data.

    Args:
        alpha: Significance level. Default 0.05.
        method: Test variant.
            - "original": Standard MK (assumes serial independence).
            - "hamed_rao": Variance correction for autocorrelation (Hamed & Rao, 1998).
              **Recommended for environmental data** where autocorrelation inflates significance.
            - "yue_wang": Alternative autocorrelation correction (Yue & Wang, 2004).
            - "pre_whitening": Remove lag-1 autocorrelation before testing.
            - "trend_free_pre_whitening": Remove trend, pre-whiten, re-add trend, then test.
        lag: Maximum lag for autocorrelation correction (Hamed-Rao / Yue-Wang).
            If None, uses n//2 - 1.
        column: Column to test. If None, tests all columns.

    Returns:
        pandas.DataFrame: One row per column with: trend, h, p_value, z, tau, s, var_s,
            slope, intercept.

    Examples:
        Detect a strong increasing trend in a linear signal with noise:

        >>> import numpy as np
        >>> from statista.time_series import TimeSeries
        >>> np.random.seed(42)
        >>> data = np.arange(50, dtype=float) + np.random.randn(50) * 3
        >>> ts = TimeSeries(data)
        >>> result = ts.mann_kendall()
        >>> result.loc["Series1", "trend"]
        'increasing'
        >>> round(float(result.loc["Series1", "z"]), 4)
        9.1177
        >>> round(float(result.loc["Series1", "tau"]), 4)
        0.8906

        Verify no trend in pure random noise:

        >>> np.random.seed(42)
        >>> ts_noise = TimeSeries(np.random.randn(50))
        >>> result_noise = ts_noise.mann_kendall()
        >>> result_noise.loc["Series1", "trend"]
        'no trend'
        >>> round(float(result_noise.loc["Series1", "p_value"]), 4)
        0.1701

        Use the Hamed-Rao autocorrelation correction:

        >>> np.random.seed(42)
        >>> data_ac = np.arange(50, dtype=float) + np.random.randn(50) * 3
        >>> ts_ac = TimeSeries(data_ac)
        >>> result_ac = ts_ac.mann_kendall(method="hamed_rao")
        >>> result_ac.loc["Series1", "trend"]
        'increasing'
        >>> round(float(result_ac.loc["Series1", "z"]), 4)
        9.1177

    References:
        Mann, H.B. (1945). Nonparametric tests against trend. Econometrica, 13(3), 245-259.

        Hamed, K.H. and Rao, A.R. (1998). A modified Mann-Kendall trend test for autocorrelated
        data. Journal of Hydrology, 204(1-4), 182-196.

        Yue, S. and Wang, C. (2004). The Mann-Kendall test modified by effective sample size to
        detect trend in serially correlated hydrological series. Water Resources Management, 18, 201-218.
    """
    cols = [column] if column is not None else list(self.columns)
    rows = []

    for col in cols:
        data = self[col].dropna().values
        result = _mann_kendall_single(data, alpha=alpha, method=method, lag=lag)
        rows.append({"column": col, **result})

    result_df = DataFrame(rows).set_index("column")
    return result_df

`sens_slope(alpha=DEFAULT_ALPHA, column=None)` #

Sen's slope estimator — robust non-parametric trend magnitude.

Computes the median of all pairwise slopes. More robust to outliers than OLS. Always pair with mann_kendall() — Sen's slope gives the magnitude, MK gives the significance.

Uses scipy.stats.theilslopes internally.

Parameters:

Name	Type	Description	Default
`alpha`	`float`	Significance level for confidence interval. Default 0.05.	`DEFAULT_ALPHA`
`column`	`str`	Column to analyze. If None, analyzes all columns.	`None`

Returns:

Type	Description
`DataFrame`	pandas.DataFrame: One row per column with: slope, intercept, slope_lower_ci, slope_upper_ci.

Examples:

Estimate the slope of a linear signal (true slope = 2.0):

>>> import numpy as np
>>> from statista.time_series import TimeSeries
>>> np.random.seed(42)
>>> data = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
>>> ts = TimeSeries(data)
>>> result = ts.sens_slope()
>>> round(float(result.loc["Series1", "slope"]), 4)
1.9629
>>> round(float(result.loc["Series1", "intercept"]), 4)
-0.6347

Check confidence interval bounds on the slope:

>>> round(float(result.loc["Series1", "slope_lower_ci"]), 4)
1.9046
>>> round(float(result.loc["Series1", "slope_upper_ci"]), 4)
2.0217

Weaker trend with more noise (true slope = 0.5):

>>> np.random.seed(42)
>>> data2 = np.arange(30, dtype=float) * 0.5 + np.random.randn(30) * 2
>>> ts2 = TimeSeries(data2)
>>> result2 = ts2.sens_slope()
>>> round(float(result2.loc["Series1", "slope"]), 4)
0.4304
>>> round(float(result2.loc["Series1", "intercept"]), 4)
0.0259

References

Sen, P.K. (1968). Estimates of the regression coefficient based on Kendall's tau. JASA, 63(324), 1379-1389.

Source code in src\statista\time_series\trend.py

def sens_slope(
    self,
    alpha: float = DEFAULT_ALPHA,
    column: str = None,
) -> DataFrame:
    """Sen's slope estimator — robust non-parametric trend magnitude.

    Computes the median of all pairwise slopes. More robust to outliers than OLS.
    Always pair with ``mann_kendall()`` — Sen's slope gives the magnitude, MK gives
    the significance.

    Uses ``scipy.stats.theilslopes`` internally.

    Args:
        alpha: Significance level for confidence interval. Default 0.05.
        column: Column to analyze. If None, analyzes all columns.

    Returns:
        pandas.DataFrame: One row per column with: slope, intercept, slope_lower_ci,
            slope_upper_ci.

    Examples:
        Estimate the slope of a linear signal (true slope = 2.0):

        >>> import numpy as np
        >>> from statista.time_series import TimeSeries
        >>> np.random.seed(42)
        >>> data = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
        >>> ts = TimeSeries(data)
        >>> result = ts.sens_slope()
        >>> round(float(result.loc["Series1", "slope"]), 4)
        1.9629
        >>> round(float(result.loc["Series1", "intercept"]), 4)
        -0.6347

        Check confidence interval bounds on the slope:

        >>> round(float(result.loc["Series1", "slope_lower_ci"]), 4)
        1.9046
        >>> round(float(result.loc["Series1", "slope_upper_ci"]), 4)
        2.0217

        Weaker trend with more noise (true slope = 0.5):

        >>> np.random.seed(42)
        >>> data2 = np.arange(30, dtype=float) * 0.5 + np.random.randn(30) * 2
        >>> ts2 = TimeSeries(data2)
        >>> result2 = ts2.sens_slope()
        >>> round(float(result2.loc["Series1", "slope"]), 4)
        0.4304
        >>> round(float(result2.loc["Series1", "intercept"]), 4)
        0.0259

    References:
        Sen, P.K. (1968). Estimates of the regression coefficient based on Kendall's tau.
        JASA, 63(324), 1379-1389.
    """
    cols = [column] if column is not None else list(self.columns)
    rows = []

    for col in cols:
        data = self[col].dropna().values
        x = np.arange(len(data))
        slope, intercept, low_slope, high_slope = theilslopes(data, x, alpha=alpha)
        rows.append(
            {
                "column": col,
                "slope": float(slope),
                "intercept": float(intercept),
                "slope_lower_ci": float(low_slope),
                "slope_upper_ci": float(high_slope),
            }
        )

    result_df = DataFrame(rows).set_index("column")
    return result_df

`detrend(method='linear', order=1)` #

Remove trend from the time series.

Parameters:

Name	Type	Description	Default
`method`	`str`	Detrending method. - "linear": Remove linear trend via scipy.signal.detrend. - "constant": Subtract the mean. - "polynomial": Remove polynomial trend of given order. - "sens": Remove trend using Sen's slope (robust to outliers).	`'linear'`
`order`	`int`	Polynomial order (only used when method="polynomial"). Default 1.	`1`

Returns:

Name	Type	Description
`TimeSeries`	`Any`	New TimeSeries with the trend removed. Same index as original.

Examples:

Remove a linear trend (result has zero mean):

>>> import numpy as np
>>> from statista.time_series import TimeSeries
>>> np.random.seed(42)
>>> data = np.arange(100, dtype=float) + np.random.randn(100) * 5
>>> ts = TimeSeries(data)
>>> detrended = ts.detrend(method="linear")
>>> round(float(detrended.values.mean()), 4)
0.0
>>> round(float(detrended.values.std()), 4)
4.5136

Remove trend using robust Sen's slope estimator:

>>> np.random.seed(42)
>>> data2 = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
>>> ts2 = TimeSeries(data2)
>>> detrended2 = ts2.detrend(method="sens")
>>> round(float(detrended2.values.mean()), 4)
0.8666

Subtract the mean (constant detrending):

>>> np.random.seed(42)
>>> ts3 = TimeSeries(np.random.randn(50) + 10)
>>> detrended3 = ts3.detrend(method="constant")
>>> round(float(detrended3.values.mean()), 4)
0.0

Source code in src\statista\time_series\trend.py

def detrend(self, method: str = "linear", order: int = 1) -> Any:
    """Remove trend from the time series.

    Args:
        method: Detrending method.
            - "linear": Remove linear trend via scipy.signal.detrend.
            - "constant": Subtract the mean.
            - "polynomial": Remove polynomial trend of given order.
            - "sens": Remove trend using Sen's slope (robust to outliers).
        order: Polynomial order (only used when method="polynomial"). Default 1.

    Returns:
        TimeSeries: New TimeSeries with the trend removed. Same index as original.

    Examples:
        Remove a linear trend (result has zero mean):

        >>> import numpy as np
        >>> from statista.time_series import TimeSeries
        >>> np.random.seed(42)
        >>> data = np.arange(100, dtype=float) + np.random.randn(100) * 5
        >>> ts = TimeSeries(data)
        >>> detrended = ts.detrend(method="linear")
        >>> round(float(detrended.values.mean()), 4)
        0.0
        >>> round(float(detrended.values.std()), 4)
        4.5136

        Remove trend using robust Sen's slope estimator:

        >>> np.random.seed(42)
        >>> data2 = np.arange(50, dtype=float) * 2 + np.random.randn(50) * 3
        >>> ts2 = TimeSeries(data2)
        >>> detrended2 = ts2.detrend(method="sens")
        >>> round(float(detrended2.values.mean()), 4)
        0.8666

        Subtract the mean (constant detrending):

        >>> np.random.seed(42)
        >>> ts3 = TimeSeries(np.random.randn(50) + 10)
        >>> detrended3 = ts3.detrend(method="constant")
        >>> round(float(detrended3.values.mean()), 4)
        0.0
    """
    from statista.time_series import TimeSeries

    result_data = np.empty_like(self.values, dtype=float)

    for i, col in enumerate(self.columns):
        data = self[col].values.astype(float)

        if method == "linear":
            result_data[:, i] = scipy_detrend(data, type="linear")
        elif method == "constant":
            result_data[:, i] = scipy_detrend(data, type="constant")
        elif method == "polynomial":
            x = np.arange(len(data), dtype=float)
            coeffs = np.polyfit(x, data, order)
            trend = np.polyval(coeffs, x)
            result_data[:, i] = data - trend
        elif method == "sens":
            x = np.arange(len(data), dtype=float)
            slope, intercept, _, _ = theilslopes(data, x)
            trend = intercept + slope * x
            result_data[:, i] = data - trend
        else:
            raise ValueError(
                f"Unknown method '{method}'. Choose from 'linear', 'constant', 'polynomial', 'sens'."
            )

    result = TimeSeries(result_data, index=self.index, columns=list(self.columns))
    return result

`innovative_trend_analysis(column=None, **kwargs)` #

Innovative Trend Analysis (ITA) — Sen (2012) method.

Splits the sorted data into two halves and plots the first half (x-axis) against the second half (y-axis). Points above the 1:1 line indicate an increasing trend, points below indicate a decreasing trend.

A minimum of 20 observations is required so that each half has at least 10 points for a meaningful distribution comparison. This threshold reflects common practice for ITA; it is not prescribed by Sen (2012) but is widely used in the literature (e.g., Serinaldi et al., 2020).

Parameters:

Name	Type	Description	Default
`column`	`str`	Column to analyze. If None, uses first column.	`None`
`**kwargs`	`Any`	Passed to `_adjust_axes_labels` (title, xlabel, ylabel, etc.).	`{}`

Returns:

Name	Type	Description
`tuple`	`tuple[DataFrame, tuple[Figure, Axes]]`	(results_df, (fig, ax)). results_df has columns: column, trend_indicator (positive = increasing).

Raises:

Type	Description
`ValueError`	If the series has fewer than 20 observations after dropping NaN.

Examples:

>>> import numpy as np
>>> from statista.time_series import TimeSeries
>>> ts = TimeSeries(np.arange(100, dtype=float))
>>> result_df, (fig, ax) = ts.innovative_trend_analysis()

References

Sen, Z. (2012). Innovative Trend Analysis Methodology. Journal of Hydrologic Engineering, 17(9), 1042-1046.

Serinaldi, F., Chebana, F., Kilsby, C.G. (2020). Dissecting innovative trend analysis. Stochastic Environmental Research and Risk Assessment, 34, 733-754.

Source code in src\statista\time_series\trend.py

def innovative_trend_analysis(
    self,
    column: str = None,
    **kwargs: Any,
) -> tuple[DataFrame, tuple[Figure, Axes]]:
    """Innovative Trend Analysis (ITA) — Sen (2012) method.

    Splits the sorted data into two halves and plots the first half (x-axis) against
    the second half (y-axis). Points above the 1:1 line indicate an increasing trend,
    points below indicate a decreasing trend.

    A minimum of 20 observations is required so that each half has at least 10 points
    for a meaningful distribution comparison. This threshold reflects common practice
    for ITA; it is not prescribed by Sen (2012) but is widely used in the literature
    (e.g., Serinaldi et al., 2020).

    Args:
        column: Column to analyze. If None, uses first column.
        **kwargs: Passed to ``_adjust_axes_labels`` (title, xlabel, ylabel, etc.).

    Returns:
        tuple: (results_df, (fig, ax)).
            results_df has columns: column, trend_indicator (positive = increasing).

    Raises:
        ValueError: If the series has fewer than 20 observations after dropping NaN.

    Examples:
        >>> import numpy as np  # doctest: +SKIP
        >>> from statista.time_series import TimeSeries  # doctest: +SKIP
        >>> ts = TimeSeries(np.arange(100, dtype=float))  # doctest: +SKIP
        >>> result_df, (fig, ax) = ts.innovative_trend_analysis()  # doctest: +SKIP

    References:
        Sen, Z. (2012). Innovative Trend Analysis Methodology. Journal of Hydrologic
        Engineering, 17(9), 1042-1046.

        Serinaldi, F., Chebana, F., Kilsby, C.G. (2020). Dissecting innovative trend
        analysis. Stochastic Environmental Research and Risk Assessment, 34, 733-754.
    """
    if column is None:
        column = self.columns[0]

    data = np.sort(self[column].dropna().values)
    n = len(data)

    # Validate minimum sample size
    if n < 20:
        raise ValueError(
            f"Innovative trend analysis requires at least 20 observations, got {n}"
        )

    # Handle odd-length series
    if n % 2 != 0:
        data = data[:-1]
        n = len(data)
        warnings.warn(
            f"Column '{column}' has odd length. Last observation dropped for analysis.",
            UserWarning
        )

    mid = n // 2

    first_half = data[:mid]
    second_half = data[mid : 2 * mid]

    fig, ax = self._get_ax_fig(**kwargs)
    kwargs.pop("fig", None)
    kwargs.pop("ax", None)

    ax.scatter(
        first_half,
        second_half,
        alpha=0.5,
        s=15,
        color="steelblue",
        edgecolor="white",
        linewidth=0.3,
    )

    # 1:1 line
    min_val = min(first_half.min(), second_half.min())
    max_val = max(first_half.max(), second_half.max())
    ax.plot(
        [min_val, max_val], [min_val, max_val], "k-", linewidth=1, label="1:1 line"
    )

    # +/- 10% envelope
    range_val = max_val - min_val
    offset = 0.10 * range_val
    ax.plot(
        [min_val, max_val],
        [min_val + offset, max_val + offset],
        "r--",
        linewidth=0.7,
        label="+10%",
    )
    ax.plot(
        [min_val, max_val],
        [min_val - offset, max_val - offset],
        "b--",
        linewidth=0.7,
        label="-10%",
    )

    # Trend indicator: mean deviation from 1:1 line
    trend_indicator = float(np.mean(second_half - first_half))

    ax.annotate(
        f"Trend indicator: {trend_indicator:.3f}",
        xy=(0.05, 0.95),
        xycoords="axes fraction",
        fontsize=10,
        va="top",
    )

    if "title" not in kwargs:
        kwargs["title"] = f"Innovative Trend Analysis — {column}"
    if "xlabel" not in kwargs:
        kwargs["xlabel"] = "First half (sorted)"
    if "ylabel" not in kwargs:
        kwargs["ylabel"] = "Second half (sorted)"

    ax = self._adjust_axes_labels(ax, **kwargs)
    plt.show()

    result_df = DataFrame(
        [
            {
                "column": column,
                "trend_indicator": trend_indicator,
            }
        ]
    ).set_index("column")

    return result_df, (fig, ax)

Trend Detection#

statista.time_series.trend #

Trend #

mann_kendall(alpha=DEFAULT_ALPHA, method='original', lag=None, column=None) #

sens_slope(alpha=DEFAULT_ALPHA, column=None) #

detrend(method='linear', order=1) #

innovative_trend_analysis(column=None, **kwargs) #

Trend Detection #

`statista.time_series.trend` #

`Trend` #

`mann_kendall(alpha=DEFAULT_ALPHA, method='original', lag=None, column=None)` #

`sens_slope(alpha=DEFAULT_ALPHA, column=None)` #

`detrend(method='linear', order=1)` #

`innovative_trend_analysis(column=None, **kwargs)` #