Skip to content

Stationarity Testing#

statista.time_series.stationarity #

Stationarity testing mixin for TimeSeries.

Stationarity #

Bases: _TimeSeriesStub

Stationarity tests for TimeSeries.

Source code in src/statista/time_series/stationarity.py
class Stationarity(_TimeSeriesStub):
    """Stationarity tests for TimeSeries."""

    def adf_test(
        self,
        regression: str = "c",
        max_lag: int = None,
        column: str = None,
    ) -> DataFrame:
        """Augmented Dickey-Fuller unit root test.

        Tests the null hypothesis that a unit root is present (series is non-stationary).
        Rejecting the null (p-value < alpha) indicates the series is stationary.

        Implemented from scratch using OLS regression and MacKinnon (1994) approximate
        p-values via ``scipy.stats.distributions``.

        Args:
            regression: Deterministic terms to include.
                - "c": constant only (default). Tests level stationarity.
                - "ct": constant + linear trend. Tests trend stationarity.
                - "n": no constant, no trend.
            max_lag: Maximum number of lagged differences to include. If None,
                uses ``int(12 * (n / 100) ** 0.25)`` (Schwert, 1989).
            column: Column to test. If None, tests all columns.

        Returns:
            pandas.DataFrame: One row per column with: statistic, p_value, used_lag,
                n_obs, crit_1%, crit_5%, crit_10%, conclusion.

        Examples:
            >>> import numpy as np
            >>> from statista.time_series import TimeSeries

            Stationary white noise rejects the null (p < 0.05):

            >>> np.random.seed(42)
            >>> ts = TimeSeries(np.random.randn(200))
            >>> result = ts.adf_test()
            >>> round(float(result.loc["Series1", "statistic"]), 4)
            -3.309
            >>> round(float(result.loc["Series1", "p_value"]), 4)
            0.0187
            >>> result.loc["Series1", "conclusion"]
            'Stationary'

            Non-stationary random walk fails to reject:

            >>> np.random.seed(10)
            >>> rw = np.cumsum(np.random.randn(200))
            >>> ts_rw = TimeSeries(rw)
            >>> result_rw = ts_rw.adf_test()
            >>> round(float(result_rw.loc["Series1", "p_value"]), 4)
            0.2937
            >>> result_rw.loc["Series1", "conclusion"]
            'Non-stationary'

            Real hydrological data:

            >>> data = np.loadtxt("examples/data/time_series1.txt")
            >>> ts = TimeSeries(data)
            >>> result = ts.adf_test()
            >>> round(float(result.loc["Series1", "statistic"]), 4)
            -2.0713

        References:
            Dickey, D.A. and Fuller, W.A. (1979). Distribution of the estimators for
            autoregressive time series with a unit root. JASA, 74(366), 427-431.

            MacKinnon, J.G. (1994). Approximate asymptotic distribution functions for
            unit-root and cointegration tests. JBES, 12(2), 167-176.
        """
        cols = [column] if column is not None else list(self.columns)
        rows = []

        for col in cols:
            data = self[col].dropna().values
            result = _adf_test_single(data, regression=regression, max_lag=max_lag)
            rows.append({"column": col, **result})

        result_df = DataFrame(rows).set_index("column")
        return result_df

    def kpss_test(
        self,
        regression: str = "c",
        n_lags: int = None,
        column: str = None,
    ) -> DataFrame:
        """KPSS stationarity test.

        Tests the null hypothesis that the series IS stationary. Rejecting the null
        (p-value < alpha) indicates non-stationarity. **This is the opposite of ADF.**

        Implemented from scratch following Kwiatkowski et al. (1992).

        Args:
            regression: Type of stationarity to test.
                - "c": level stationarity (default). Null: stationary around a constant.
                - "ct": trend stationarity. Null: stationary around a linear trend.
            n_lags: Lag truncation for the Newey-West estimator. If None,
                uses ``int(np.sqrt(12 * n / 100))`` (Hobijn et al., 1998).
            column: Column to test. If None, tests all columns.

        Returns:
            pandas.DataFrame: One row per column with: statistic, p_value, lags,
                crit_10%, crit_5%, crit_2.5%, crit_1%, conclusion.

        Examples:
            >>> import numpy as np
            >>> from statista.time_series import TimeSeries

            Stationary white noise does not reject the null (p > 0.05):

            >>> np.random.seed(42)
            >>> ts = TimeSeries(np.random.randn(200))
            >>> result = ts.kpss_test()
            >>> round(float(result.loc["Series1", "statistic"]), 4)
            0.1974
            >>> round(float(result.loc["Series1", "p_value"]), 1)
            0.1
            >>> result.loc["Series1", "conclusion"]
            'Stationary'

            Non-stationary random walk rejects the null:

            >>> np.random.seed(10)
            >>> rw = np.cumsum(np.random.randn(200))
            >>> ts_rw = TimeSeries(rw)
            >>> result_rw = ts_rw.kpss_test()
            >>> round(float(result_rw.loc["Series1", "statistic"]), 4)
            2.8977
            >>> result_rw.loc["Series1", "conclusion"]
            'Non-stationary'

            Real hydrological data:

            >>> data = np.loadtxt("examples/data/time_series1.txt")
            >>> ts = TimeSeries(data)
            >>> result = ts.kpss_test()
            >>> round(float(result.loc["Series1", "statistic"]), 4)
            0.1003

        References:
            Kwiatkowski, D., Phillips, P.C.B., Schmidt, P. and Shin, Y. (1992).
            Testing the null hypothesis of stationarity against the alternative of
            a unit root. Journal of Econometrics, 54(1-3), 159-178.
        """
        cols = [column] if column is not None else list(self.columns)
        rows = []

        for col in cols:
            data = self[col].dropna().values
            result = _kpss_test_single(data, regression=regression, n_lags=n_lags)
            rows.append({"column": col, **result})

        result_df = DataFrame(rows).set_index("column")
        return result_df

    def stationarity_summary(self, alpha: float = DEFAULT_ALPHA) -> DataFrame:
        """Combined ADF + KPSS stationarity diagnosis.

        Runs both ADF and KPSS tests and produces an interpretation:

        +---------------+----------------+-------------------------------------------+
        | ADF rejects?  | KPSS rejects?  | Diagnosis                                 |
        +===============+================+===========================================+
        | Yes           | No             | Stationary                                |
        +---------------+----------------+-------------------------------------------+
        | No            | Yes            | Non-stationary (unit root)                |
        +---------------+----------------+-------------------------------------------+
        | Yes           | Yes            | Trend-stationary                          |
        +---------------+----------------+-------------------------------------------+
        | No            | No             | Inconclusive                              |
        +---------------+----------------+-------------------------------------------+

        Constant series (std=0) are treated as a special case and diagnosed as
        ``"Stationary (constant)"``, since they are trivially stationary by
        definition (constant mean, zero variance, constant autocorrelation).

        Args:
            alpha: Significance level for both tests. Default 0.05.

        Returns:
            pandas.DataFrame: One row per column with: adf_stat, adf_pvalue,
                kpss_stat, kpss_pvalue, diagnosis.

        Examples:
            >>> import numpy as np
            >>> from statista.time_series import TimeSeries

            Stationary white noise (ADF rejects, KPSS does not):

            >>> np.random.seed(42)
            >>> ts = TimeSeries(np.random.randn(200))
            >>> result = ts.stationarity_summary()
            >>> result.loc["Series1", "diagnosis"]
            'Stationary'
            >>> round(float(result.loc["Series1", "adf_stat"]), 4)
            -3.309
            >>> round(float(result.loc["Series1", "kpss_stat"]), 4)
            0.1974

            Non-stationary random walk (ADF fails to reject, KPSS rejects):

            >>> np.random.seed(10)
            >>> rw = np.cumsum(np.random.randn(200))
            >>> ts_rw = TimeSeries(rw)
            >>> result_rw = ts_rw.stationarity_summary()
            >>> result_rw.loc["Series1", "diagnosis"]
            'Non-stationary (unit root)'
        """
        adf_df = self.adf_test()
        kpss_df = self.kpss_test()

        rows = []
        for col in self.columns:
            # A constant series is trivially stationary: bypass the p-value
            # logic, which would otherwise report "Inconclusive" because neither
            # test rejects its null for degenerate input.
            col_data = self[col].dropna().values
            if len(col_data) > 0 and np.std(col_data) == 0:
                diagnosis = "Stationary (constant)"
                adf_reject = False
                kpss_reject = False
            else:
                adf_reject = float(adf_df.loc[col, "p_value"]) < alpha
                kpss_reject = float(kpss_df.loc[col, "p_value"]) < alpha

                if adf_reject and not kpss_reject:
                    diagnosis = "Stationary"
                elif not adf_reject and kpss_reject:
                    diagnosis = "Non-stationary (unit root)"
                elif adf_reject and kpss_reject:
                    diagnosis = "Trend-stationary"
                else:
                    diagnosis = "Inconclusive"

            rows.append(
                {
                    "column": col,
                    "adf_stat": float(adf_df.loc[col, "statistic"]),
                    "adf_pvalue": float(adf_df.loc[col, "p_value"]),
                    "kpss_stat": float(kpss_df.loc[col, "statistic"]),
                    "kpss_pvalue": float(kpss_df.loc[col, "p_value"]),
                    "diagnosis": diagnosis,
                }
            )

        result = DataFrame(rows).set_index("column")
        return result
adf_test(regression='c', max_lag=None, column=None) #

Augmented Dickey-Fuller unit root test.

Tests the null hypothesis that a unit root is present (series is non-stationary). Rejecting the null (p-value < alpha) indicates the series is stationary.

Implemented from scratch using OLS regression and MacKinnon (1994) approximate p-values via scipy.stats.distributions.

Parameters:

Name Type Description Default
regression str

Deterministic terms to include. - "c": constant only (default). Tests level stationarity. - "ct": constant + linear trend. Tests trend stationarity. - "n": no constant, no trend.

'c'
max_lag int

Maximum number of lagged differences to include. If None, uses int(12 * (n / 100) ** 0.25) (Schwert, 1989).

None
column str

Column to test. If None, tests all columns.

None

Returns:

Type Description
DataFrame

pandas.DataFrame: One row per column with: statistic, p_value, used_lag, n_obs, crit_1%, crit_5%, crit_10%, conclusion.

Examples:

>>> import numpy as np
>>> from statista.time_series import TimeSeries

Stationary white noise rejects the null (p < 0.05):

>>> np.random.seed(42)
>>> ts = TimeSeries(np.random.randn(200))
>>> result = ts.adf_test()
>>> round(float(result.loc["Series1", "statistic"]), 4)
-3.309
>>> round(float(result.loc["Series1", "p_value"]), 4)
0.0187
>>> result.loc["Series1", "conclusion"]
'Stationary'

Non-stationary random walk fails to reject:

>>> np.random.seed(10)
>>> rw = np.cumsum(np.random.randn(200))
>>> ts_rw = TimeSeries(rw)
>>> result_rw = ts_rw.adf_test()
>>> round(float(result_rw.loc["Series1", "p_value"]), 4)
0.2937
>>> result_rw.loc["Series1", "conclusion"]
'Non-stationary'

Real hydrological data:

>>> data = np.loadtxt("examples/data/time_series1.txt")
>>> ts = TimeSeries(data)
>>> result = ts.adf_test()
>>> round(float(result.loc["Series1", "statistic"]), 4)
-2.0713
References

Dickey, D.A. and Fuller, W.A. (1979). Distribution of the estimators for autoregressive time series with a unit root. JASA, 74(366), 427-431.

MacKinnon, J.G. (1994). Approximate asymptotic distribution functions for unit-root and cointegration tests. JBES, 12(2), 167-176.

Source code in src/statista/time_series/stationarity.py
def adf_test(
    self,
    regression: str = "c",
    max_lag: int = None,
    column: str = None,
) -> DataFrame:
    """Augmented Dickey-Fuller unit root test.

    Tests the null hypothesis that a unit root is present (series is non-stationary).
    Rejecting the null (p-value < alpha) indicates the series is stationary.

    Implemented from scratch using OLS regression and MacKinnon (1994) approximate
    p-values via ``scipy.stats.distributions``.

    Args:
        regression: Deterministic terms to include.
            - "c": constant only (default). Tests level stationarity.
            - "ct": constant + linear trend. Tests trend stationarity.
            - "n": no constant, no trend.
        max_lag: Maximum number of lagged differences to include. If None,
            uses ``int(12 * (n / 100) ** 0.25)`` (Schwert, 1989).
        column: Column to test. If None, tests all columns.

    Returns:
        pandas.DataFrame: One row per column with: statistic, p_value, used_lag,
            n_obs, crit_1%, crit_5%, crit_10%, conclusion.

    Examples:
        >>> import numpy as np
        >>> from statista.time_series import TimeSeries

        Stationary white noise rejects the null (p < 0.05):

        >>> np.random.seed(42)
        >>> ts = TimeSeries(np.random.randn(200))
        >>> result = ts.adf_test()
        >>> round(float(result.loc["Series1", "statistic"]), 4)
        -3.309
        >>> round(float(result.loc["Series1", "p_value"]), 4)
        0.0187
        >>> result.loc["Series1", "conclusion"]
        'Stationary'

        Non-stationary random walk fails to reject:

        >>> np.random.seed(10)
        >>> rw = np.cumsum(np.random.randn(200))
        >>> ts_rw = TimeSeries(rw)
        >>> result_rw = ts_rw.adf_test()
        >>> round(float(result_rw.loc["Series1", "p_value"]), 4)
        0.2937
        >>> result_rw.loc["Series1", "conclusion"]
        'Non-stationary'

        Real hydrological data:

        >>> data = np.loadtxt("examples/data/time_series1.txt")
        >>> ts = TimeSeries(data)
        >>> result = ts.adf_test()
        >>> round(float(result.loc["Series1", "statistic"]), 4)
        -2.0713

    References:
        Dickey, D.A. and Fuller, W.A. (1979). Distribution of the estimators for
        autoregressive time series with a unit root. JASA, 74(366), 427-431.

        MacKinnon, J.G. (1994). Approximate asymptotic distribution functions for
        unit-root and cointegration tests. JBES, 12(2), 167-176.
    """
    cols = [column] if column is not None else list(self.columns)
    rows = []

    for col in cols:
        data = self[col].dropna().values
        result = _adf_test_single(data, regression=regression, max_lag=max_lag)
        rows.append({"column": col, **result})

    result_df = DataFrame(rows).set_index("column")
    return result_df
kpss_test(regression='c', n_lags=None, column=None) #

KPSS stationarity test.

Tests the null hypothesis that the series IS stationary. Rejecting the null (p-value < alpha) indicates non-stationarity. This is the opposite of ADF.

Implemented from scratch following Kwiatkowski et al. (1992).

Parameters:

Name Type Description Default
regression str

Type of stationarity to test. - "c": level stationarity (default). Null: stationary around a constant. - "ct": trend stationarity. Null: stationary around a linear trend.

'c'
n_lags int

Lag truncation for the Newey-West estimator. If None, uses int(np.sqrt(12 * n / 100)) (Hobijn et al., 1998).

None
column str

Column to test. If None, tests all columns.

None

Returns:

Type Description
DataFrame

pandas.DataFrame: One row per column with: statistic, p_value, lags, crit_10%, crit_5%, crit_2.5%, crit_1%, conclusion.

Examples:

>>> import numpy as np
>>> from statista.time_series import TimeSeries

Stationary white noise does not reject the null (p > 0.05):

>>> np.random.seed(42)
>>> ts = TimeSeries(np.random.randn(200))
>>> result = ts.kpss_test()
>>> round(float(result.loc["Series1", "statistic"]), 4)
0.1974
>>> round(float(result.loc["Series1", "p_value"]), 1)
0.1
>>> result.loc["Series1", "conclusion"]
'Stationary'

Non-stationary random walk rejects the null:

>>> np.random.seed(10)
>>> rw = np.cumsum(np.random.randn(200))
>>> ts_rw = TimeSeries(rw)
>>> result_rw = ts_rw.kpss_test()
>>> round(float(result_rw.loc["Series1", "statistic"]), 4)
2.8977
>>> result_rw.loc["Series1", "conclusion"]
'Non-stationary'

Real hydrological data:

>>> data = np.loadtxt("examples/data/time_series1.txt")
>>> ts = TimeSeries(data)
>>> result = ts.kpss_test()
>>> round(float(result.loc["Series1", "statistic"]), 4)
0.1003
References

Kwiatkowski, D., Phillips, P.C.B., Schmidt, P. and Shin, Y. (1992). Testing the null hypothesis of stationarity against the alternative of a unit root. Journal of Econometrics, 54(1-3), 159-178.

Source code in src/statista/time_series/stationarity.py
def kpss_test(
    self,
    regression: str = "c",
    n_lags: int = None,
    column: str = None,
) -> DataFrame:
    """KPSS stationarity test.

    Tests the null hypothesis that the series IS stationary. Rejecting the null
    (p-value < alpha) indicates non-stationarity. **This is the opposite of ADF.**

    Implemented from scratch following Kwiatkowski et al. (1992).

    Args:
        regression: Type of stationarity to test.
            - "c": level stationarity (default). Null: stationary around a constant.
            - "ct": trend stationarity. Null: stationary around a linear trend.
        n_lags: Lag truncation for the Newey-West estimator. If None,
            uses ``int(np.sqrt(12 * n / 100))`` (Hobijn et al., 1998).
        column: Column to test. If None, tests all columns.

    Returns:
        pandas.DataFrame: One row per column with: statistic, p_value, lags,
            crit_10%, crit_5%, crit_2.5%, crit_1%, conclusion.

    Examples:
        >>> import numpy as np
        >>> from statista.time_series import TimeSeries

        Stationary white noise does not reject the null (p > 0.05):

        >>> np.random.seed(42)
        >>> ts = TimeSeries(np.random.randn(200))
        >>> result = ts.kpss_test()
        >>> round(float(result.loc["Series1", "statistic"]), 4)
        0.1974
        >>> round(float(result.loc["Series1", "p_value"]), 1)
        0.1
        >>> result.loc["Series1", "conclusion"]
        'Stationary'

        Non-stationary random walk rejects the null:

        >>> np.random.seed(10)
        >>> rw = np.cumsum(np.random.randn(200))
        >>> ts_rw = TimeSeries(rw)
        >>> result_rw = ts_rw.kpss_test()
        >>> round(float(result_rw.loc["Series1", "statistic"]), 4)
        2.8977
        >>> result_rw.loc["Series1", "conclusion"]
        'Non-stationary'

        Real hydrological data:

        >>> data = np.loadtxt("examples/data/time_series1.txt")
        >>> ts = TimeSeries(data)
        >>> result = ts.kpss_test()
        >>> round(float(result.loc["Series1", "statistic"]), 4)
        0.1003

    References:
        Kwiatkowski, D., Phillips, P.C.B., Schmidt, P. and Shin, Y. (1992).
        Testing the null hypothesis of stationarity against the alternative of
        a unit root. Journal of Econometrics, 54(1-3), 159-178.
    """
    cols = [column] if column is not None else list(self.columns)
    rows = []

    for col in cols:
        data = self[col].dropna().values
        result = _kpss_test_single(data, regression=regression, n_lags=n_lags)
        rows.append({"column": col, **result})

    result_df = DataFrame(rows).set_index("column")
    return result_df
stationarity_summary(alpha=DEFAULT_ALPHA) #

Combined ADF + KPSS stationarity diagnosis.

Runs both ADF and KPSS tests and produces an interpretation:

+---------------+----------------+-------------------------------------------+ | ADF rejects? | KPSS rejects? | Diagnosis | +===============+================+===========================================+ | Yes | No | Stationary | +---------------+----------------+-------------------------------------------+ | No | Yes | Non-stationary (unit root) | +---------------+----------------+-------------------------------------------+ | Yes | Yes | Trend-stationary | +---------------+----------------+-------------------------------------------+ | No | No | Inconclusive | +---------------+----------------+-------------------------------------------+

Constant series (std=0) are treated as a special case and diagnosed as "Stationary (constant)", since they are trivially stationary by definition (constant mean, zero variance, constant autocorrelation).

Parameters:

Name Type Description Default
alpha float

Significance level for both tests. Default 0.05.

DEFAULT_ALPHA

Returns:

Type Description
DataFrame

pandas.DataFrame: One row per column with: adf_stat, adf_pvalue, kpss_stat, kpss_pvalue, diagnosis.

Examples:

>>> import numpy as np
>>> from statista.time_series import TimeSeries

Stationary white noise (ADF rejects, KPSS does not):

>>> np.random.seed(42)
>>> ts = TimeSeries(np.random.randn(200))
>>> result = ts.stationarity_summary()
>>> result.loc["Series1", "diagnosis"]
'Stationary'
>>> round(float(result.loc["Series1", "adf_stat"]), 4)
-3.309
>>> round(float(result.loc["Series1", "kpss_stat"]), 4)
0.1974

Non-stationary random walk (ADF fails to reject, KPSS rejects):

>>> np.random.seed(10)
>>> rw = np.cumsum(np.random.randn(200))
>>> ts_rw = TimeSeries(rw)
>>> result_rw = ts_rw.stationarity_summary()
>>> result_rw.loc["Series1", "diagnosis"]
'Non-stationary (unit root)'
Source code in src/statista/time_series/stationarity.py
def stationarity_summary(self, alpha: float = DEFAULT_ALPHA) -> DataFrame:
    """Combined ADF + KPSS stationarity diagnosis.

    Runs both ADF and KPSS tests and produces an interpretation:

    +---------------+----------------+-------------------------------------------+
    | ADF rejects?  | KPSS rejects?  | Diagnosis                                 |
    +===============+================+===========================================+
    | Yes           | No             | Stationary                                |
    +---------------+----------------+-------------------------------------------+
    | No            | Yes            | Non-stationary (unit root)                |
    +---------------+----------------+-------------------------------------------+
    | Yes           | Yes            | Trend-stationary                          |
    +---------------+----------------+-------------------------------------------+
    | No            | No             | Inconclusive                              |
    +---------------+----------------+-------------------------------------------+

    Constant series (std=0) are treated as a special case and diagnosed as
    ``"Stationary (constant)"``, since they are trivially stationary by
    definition (constant mean, zero variance, constant autocorrelation).

    Args:
        alpha: Significance level for both tests. Default 0.05.

    Returns:
        pandas.DataFrame: One row per column with: adf_stat, adf_pvalue,
            kpss_stat, kpss_pvalue, diagnosis.

    Examples:
        >>> import numpy as np
        >>> from statista.time_series import TimeSeries

        Stationary white noise (ADF rejects, KPSS does not):

        >>> np.random.seed(42)
        >>> ts = TimeSeries(np.random.randn(200))
        >>> result = ts.stationarity_summary()
        >>> result.loc["Series1", "diagnosis"]
        'Stationary'
        >>> round(float(result.loc["Series1", "adf_stat"]), 4)
        -3.309
        >>> round(float(result.loc["Series1", "kpss_stat"]), 4)
        0.1974

        Non-stationary random walk (ADF fails to reject, KPSS rejects):

        >>> np.random.seed(10)
        >>> rw = np.cumsum(np.random.randn(200))
        >>> ts_rw = TimeSeries(rw)
        >>> result_rw = ts_rw.stationarity_summary()
        >>> result_rw.loc["Series1", "diagnosis"]
        'Non-stationary (unit root)'
    """
    adf_df = self.adf_test()
    kpss_df = self.kpss_test()

    rows = []
    for col in self.columns:
        # A constant series is trivially stationary: bypass the p-value
        # logic, which would otherwise report "Inconclusive" because neither
        # test rejects its null for degenerate input.
        col_data = self[col].dropna().values
        if len(col_data) > 0 and np.std(col_data) == 0:
            diagnosis = "Stationary (constant)"
            adf_reject = False
            kpss_reject = False
        else:
            adf_reject = float(adf_df.loc[col, "p_value"]) < alpha
            kpss_reject = float(kpss_df.loc[col, "p_value"]) < alpha

            if adf_reject and not kpss_reject:
                diagnosis = "Stationary"
            elif not adf_reject and kpss_reject:
                diagnosis = "Non-stationary (unit root)"
            elif adf_reject and kpss_reject:
                diagnosis = "Trend-stationary"
            else:
                diagnosis = "Inconclusive"

        rows.append(
            {
                "column": col,
                "adf_stat": float(adf_df.loc[col, "statistic"]),
                "adf_pvalue": float(adf_df.loc[col, "p_value"]),
                "kpss_stat": float(kpss_df.loc[col, "statistic"]),
                "kpss_pvalue": float(kpss_df.loc[col, "p_value"]),
                "diagnosis": diagnosis,
            }
        )

    result = DataFrame(rows).set_index("column")
    return result