This example has been auto-generated from the examples/ folder at GitHub repository.

Autoregressive Models

# Activate local environment, see `Project.toml`
import Pkg; Pkg.activate(".."); Pkg.instantiate();

In this example we are going to perform an automated Variational Bayesian Inference for a latent autoregressive model that can be represented as following:

\[\begin{aligned} p(\gamma) &= \mathrm{Gamma}(\gamma|a, b),\\ p(\mathbf{\theta}) &= \mathcal{N}(\mathbf{\theta}|\mathbf{\mu}, \Sigma),\\ p(x_t|\mathbf{x}_{t-1:t-k}) &= \mathcal{N}(x_t|\mathbf{\theta}^{T}\mathbf{x}_{t-1:t-k}, \gamma^{-1}),\\ p(y_t|x_t) &= \mathcal{N}(y_t|x_t, \tau^{-1}), \end{aligned}\]

where $x_t$ is a current state of our system, $\mathbf{x}_{t-1:t-k}$ is a sequence of $k$ previous states, $k$ is an order of autoregression process, $\mathbf{\theta}$ is a vector of transition coefficients, $\gamma$ is a precision of state transition process, $y_k$ is a noisy observation of $x_k$ with precision $\tau$.

For a more rigorous introduction to Bayesian inference in Autoregressive models we refer to Albert Podusenko, Message Passing-Based Inference for Time-Varying Autoregressive Models.

We start with importing all needed packages:

using RxInfer, Distributions, LinearAlgebra, Random, Plots, BenchmarkTools, Parameters

Let's generate some synthetic dataset, we use a predefined sets of coeffcients for $k$ = 1, 3 and 5 respectively:

# The following coefficients correspond to stable poles
coefs_ar_1 = [-0.27002517200218096]
coefs_ar_2 = [0.4511170798064709, -0.05740081602446657]
coefs_ar_5 = [0.10699399235785655, -0.5237303489793305, 0.3068897071844715, -0.17232255282458891, 0.13323964347539288];

function generate_ar_data(rng, n, θ, γ, τ)
    order        = length(θ)
    states       = Vector{Vector{Float64}}(undef, n + 3order)
    observations = Vector{Float64}(undef, n + 3order)
    
    γ_std = sqrt(inv(γ))
    τ_std = sqrt(inv(τ))
    
    states[1] = randn(rng, order)
    
    for i in 2:(n + 3order)
        states[i]       = vcat(rand(rng, Normal(dot(θ, states[i - 1]), γ_std)), states[i-1][1:end-1])
        observations[i] = rand(rng, Normal(states[i][1], τ_std))
    end
    
    return states[1+3order:end], observations[1+3order:end]
end

generate_ar_data (generic function with 1 method)

# Seed for reproducibility
seed = 123
rng  = MersenneTwister(seed)

# Number of observations in synthetic dataset
n = 500

# AR process parameters
real_γ = 1.0
real_τ = 0.5
real_θ = coefs_ar_5

states, observations = generate_ar_data(rng, n, real_θ, real_γ, real_τ);

Let's plot our synthetic dataset:

plot(first.(states), label = "Hidden states")
scatter!(observations, label = "Observations")

Next step is to specify probabilistic model, inference constraints and run inference procedure with RxInfer. We will specify two different models for Multivariate AR with order $k$ > 1 and for Univariate AR (reduces to simple State-Space-Model) with order $k$ = 1.

@model function lar_model(T, y, order, c, τ)
    
    
    # Prior for first state
    if T === Multivariate
        γ  ~ Gamma(α = 1.0, β = 1.0)
        θ  ~ MvNormal(μ = zeros(order), Λ = diageye(order))
        x0 ~ MvNormal(μ = zeros(order), Λ = diageye(order))
    else
        γ  ~ Gamma(α = 1.0, β = 1.0)
        θ  ~ Normal(μ = 0.0, γ = 1.0)
        x0 ~ Normal(μ = 0.0, γ = 1.0)
    end
    
    x_prev = x0
    
    for i in eachindex(y)
        
        x[i] ~ AR(x_prev, θ, γ) 
        
        if T === Multivariate
            y[i] ~ Normal(μ = dot(c, x[i]), γ = τ)
        else
            y[i] ~ Normal(μ = c * x[i], γ = τ)
        end
        
        x_prev = x[i]
    end

end

@constraints function ar_constraints() 
    q(x0, x, θ, γ) = q(x0, x)q(θ)q(γ)
end

ar_constraints (generic function with 1 method)

@meta function ar_meta(artype, order, stype)
    AR() -> ARMeta(artype, order, stype)
end

ar_meta (generic function with 1 method)

@initialization function ar_init(morder)
    q(γ) = GammaShapeRate(1.0, 1.0)
    q(θ) = MvNormalMeanPrecision(zeros(morder), diageye(morder))
end

ar_init (generic function with 1 method)

morder  = 5
martype = Multivariate
mc      = ReactiveMP.ar_unit(martype, morder)
mconstraints = ar_constraints()
mmeta        = ar_meta(martype, morder, ARsafe())

moptions = (limit_stack_depth = 100, )

mmodel          = lar_model(T=martype, order=morder, c=mc, τ=real_τ)
mdata           = (y = observations, )
minitialization = ar_init(morder)
mreturnvars     = (x = KeepLast(), γ = KeepEach(), θ = KeepEach())

# First execution is slow due to Julia's initial compilation 
# Subsequent runs will be faster (benchmarks are below)
mresult = infer(
    model = mmodel, 
    data  = mdata,
    constraints   = mconstraints,
    meta          = mmeta,
    options       = moptions,
    initialization = minitialization,
    returnvars    = mreturnvars,
    free_energy   = true,
    iterations    = 50, 
    showprogress  = false
);

@unpack x, γ, θ = mresult.posteriors

Dict{Symbol, Vector} with 3 entries:
  :γ => GammaShapeRate{Float64}[GammaShapeRate{Float64}(a=251.0, b=47.5918)
, Ga…
  :θ => MvNormalWeightedMeanPrecision{Float64, Vector{Float64}, Matrix{Floa
t64}…
  :x => MvNormalWeightedMeanPrecision{Float64, Vector{Float64}, Matrix{Floa
t64}…

We will use different initial marginals depending on type of our AR process

p1 = plot(first.(states), label="Hidden state")
p1 = scatter!(p1, observations, label="Observations")
p1 = plot!(p1, first.(mean.(x)), ribbon = first.(std.(x)), label="Inferred states", legend = :bottomright)

p2 = plot(mean.(γ), ribbon = std.(γ), label = "Inferred transition precision", legend = :topright)
p2 = plot!([ real_γ ], seriestype = :hline, label = "Real transition precision")

p3 = plot(mresult.free_energy, label = "Bethe Free Energy")

plot(p1, p2, p3, layout = @layout([ a; b c ]))

Let's also plot a subrange of our results:

subrange = div(n,5):(div(n, 5) + div(n, 5))

plot(subrange, first.(states)[subrange], label="Hidden state")
scatter!(subrange, observations[subrange], label="Observations")
plot!(subrange, first.(mean.(x))[subrange], ribbon = sqrt.(first.(var.(x)))[subrange], label="Inferred states", legend = :bottomright)

It is also interesting to see where our AR coefficients converge to:

let
    pθ = plot()

    θms = mean.(θ)
    θvs = var.(θ)
    
    l = length(θms)

    edim(e) = (a) -> map(r -> r[e], a)

    for i in 1:length(first(θms))
        pθ = plot!(pθ, θms |> edim(i), ribbon = θvs |> edim(i) .|> sqrt, label = "Estimated θ[$i]")
    end
    
    for i in 1:length(real_θ)
        pθ = plot!(pθ, [ real_θ[i] ], seriestype = :hline, label = "Real θ[$i]")
    end
    
    plot(pθ, legend = :outertopright, size = (800, 300))
end

println("$(length(real_θ))-order AR inference Bethe Free Energy: ", last(mresult.free_energy))

5-order AR inference Bethe Free Energy: 1024.077566129808

We can also run a 1-order AR inference on 5-order AR data:

uorder  = 1
uartype = Univariate
uc      = ReactiveMP.ar_unit(uartype, uorder)
uconstraints = ar_constraints()
umeta        = ar_meta(uartype, uorder, ARsafe())

uoptions = (limit_stack_depth = 100, )
umodel         = lar_model(T=uartype, order=uorder, c=uc, τ=real_τ)
udata          = (y = observations, )
initialization = @initialization begin
    q(γ) = GammaShapeRate(1.0, 1.0)
    q(θ) = NormalMeanPrecision(0.0, 1.0)
end
ureturnvars    = (x = KeepLast(), γ = KeepEach(), θ = KeepEach())

uresult = infer(
    model = umodel, 
    data  = udata,
    meta  = umeta,
    constraints    = uconstraints,
    initialization = initialization,
    returnvars     = ureturnvars,
    free_energy    = true,
    iterations     = 50, 
    showprogress   = false
);

println("1-order AR inference Bethe Free Energy: ", last(uresult.free_energy))

1-order AR inference Bethe Free Energy: 1025.8792949319945

if uresult.free_energy[end] > mresult.free_energy[end]
    println("We can see that, according to final Bethe Free Energy value, in this example 5-order AR process can describe data better than 1-order AR.")
else
    error("AR-1 performs better than AR-5...") 
end

We can see that, according to final Bethe Free Energy value, in this exampl
e 5-order AR process can describe data better than 1-order AR.

Autoregressive Moving Average Model

Bayesian ARMA model can be effectively implemeted in RxInfer.jl. For theoretical details on Varitional Inference for ARMA model, we refer the reader to the following paper. The Bayesian ARMA model can be written as follows:

\[\begin{aligned} e_t \sim \mathcal{N}(0, \gamma^{-1}) \quad \theta &\sim \mathcal{MN}(\mathbf{0}, \mathbf{I}) \quad \eta \sim \mathcal{MN}(\mathbf{0}, \mathbf{I}) \\ \mathbf{h}_0 &\sim \mathcal{MN}\left(\begin{bmatrix} e_{-1} \\ e_{-2} \end{bmatrix}, \mathbf{I}\right) \\ \mathbf{h}_t &= \mathbf{S}\mathbf{h}_{t-1} + \mathbf{c} e_{t-1} \\ \mathbf{x}_t &= \boldsymbol{\theta}^\top\mathbf{x}_{t-1} + \boldsymbol{\eta}^\top\mathbf{h}_{t} + e_t \end{aligned}\]

where shift matrix $\mathbf{S}$ is

\[\begin{aligned} \mathbf{S} = \begin{pmatrix} 0 & 0 \\ 1 & 0 \end{pmatrix} \end{aligned}\]

and unit vector $\mathbf{c}$:

\[\begin{aligned} \mathbf{c}=[1, 0] \end{aligned}\]

when MA order is $2$

In this way, $\mathbf{h}_t$ containing errors $e_t$ can be viewed as hidden state.

In short, the Bayesian ARMA model has two intractabilities: (1) induced by the multiplication of two Gaussian RVs, i.e., $\boldsymbol{\eta}^\top\mathbf{h}_{t}$, (2) induced by errors $e_t$ that prevents analytical update of precision parameter $\gamma$ (this can be easily seen when constructing the Factor Graph, i.e. there is a loop). Both problems can be easily resolved in RxInfer.jl, by creating a hybrid inference algorithm based on Loopy Variational Message Passing.

# Load packages
using RxInfer, LinearAlgebra, CSV, DataFrames, Plots

# Define shift function
function shift(dim)
    S = Matrix{Float64}(I, dim, dim)
    for i in dim:-1:2
           S[i,:] = S[i-1, :]
    end
    S[1, :] = zeros(dim)
    return S
end

shift (generic function with 1 method)

@model function ARMA(x, x_prev, h_prior, γ_prior, τ_prior, η_prior, θ_prior, c, b, S)
    
    # priors
    γ  ~ γ_prior
    η  ~ η_prior
    θ  ~ θ_prior
    τ  ~ τ_prior
    
    # initial
    h_0 ~ h_prior
    z[1] ~ AR(h_0, η, τ)
    e[1] ~ Normal(mean = 0.0, precision = γ)

    x[1] ~ dot(b, z[1]) + dot(θ, x_prev[1]) + e[1]
    
    h_prev = h_0
    for t in 1:length(x)-1
        
        e[t+1] ~ Normal(mean = 0.0, precision = γ)
        h[t]   ~ S*h_prev + b*e[t]
        z[t+1] ~ AR(h[t], η, τ)
        x[t+1] ~ dot(z[t+1], b) + dot(θ, x_prev[t]) + e[t+1]
        h_prev = h[t]
    end
end

To validate our model and inference, we will use American Airlines stock data downloaded from Kaggle

x_df = CSV.read("../data/arma/aal_stock.csv", DataFrame)

1259×7 DataFrame
  Row │ date        open     high     low      close    volume    Name
      │ Date        Float64  Float64  Float64  Float64  Int64     String3
──────┼───────────────────────────────────────────────────────────────────
    1 │ 2013-02-08    15.07    15.12   14.63     14.75   8407500  AAL
    2 │ 2013-02-11    14.89    15.01   14.26     14.46   8882000  AAL
    3 │ 2013-02-12    14.45    14.51   14.1      14.27   8126000  AAL
    4 │ 2013-02-13    14.3     14.94   14.25     14.66  10259500  AAL
    5 │ 2013-02-14    14.94    14.96   13.16     13.99  31879900  AAL
    6 │ 2013-02-15    13.93    14.61   13.93     14.5   15628000  AAL
    7 │ 2013-02-19    14.33    14.56   14.08     14.26  11354400  AAL
    8 │ 2013-02-20    14.17    14.26   13.15     13.33  14725200  AAL
  ⋮   │     ⋮          ⋮        ⋮        ⋮        ⋮        ⋮         ⋮
 1253 │ 2018-01-30    52.45    53.05   52.36     52.59   4741808  AAL
 1254 │ 2018-01-31    53.08    54.71   53.0      54.32   5962937  AAL
 1255 │ 2018-02-01    54.0     54.64   53.59     53.88   3623078  AAL
 1256 │ 2018-02-02    53.49    53.99   52.03     52.1    5109361  AAL
 1257 │ 2018-02-05    51.99    52.39   49.75     49.76   6878284  AAL
 1258 │ 2018-02-06    49.32    51.5    48.79     51.18   6782480  AAL
 1259 │ 2018-02-07    50.91    51.98   50.89     51.4    4845831  AAL
                                                         1244 rows omitted

# we will use "close" column
x_data = filter(!ismissing, x_df[:, 5]);

# Plot data
plot(x_data, xlabel="day", ylabel="price", label=false)

p_order = 10 # AR
q_order = 4 # MA

# Training set
train_size = 1000
x_prev_train = [Float64.(x_data[i+p_order-1:-1:i]) for i in 1:length(x_data)-p_order][1:train_size]
x_train = Float64.(x_data[p_order+1:end])[1:train_size];

# Test set
x_prev_test = [Float64.(x_data[i+p_order-1:-1:i]) for i in 1:length(x_data)-p_order][train_size+1:end]
x_test = Float64.(x_data[p_order+1:end])[train_size+1:end];

Inference

# Constraints are needed for performing VMP
arma_constraints = @constraints begin
    q(z, h_0, h, η, τ, γ,e) = q(h_0)q(z, h)q(η)q(τ)q(γ)q(e)
end;

# This cell defines prior knowledge for model parameters
h_prior = MvNormalMeanPrecision(zeros(q_order), diageye(q_order))
γ_prior = GammaShapeRate(1e4, 1.0)
τ_prior = GammaShapeRate(1e2, 1.0)
η_prior = MvNormalMeanPrecision(zeros(q_order), diageye(q_order))
θ_prior = MvNormalMeanPrecision(zeros(p_order), diageye(p_order));

# Model's graph has structural loops, hence, it requires pre-initialisation
arma_initialization = @initialization begin
    q(h_0) = h_prior
    μ(h_0) = h_prior
    q(h) = h_prior
    μ(h) = h_prior
    q(γ) = γ_prior
    q(τ) = τ_prior
    q(η) = η_prior
    q(θ) = θ_prior

end
arma_meta       = ar_meta(Multivariate, q_order, ARsafe());

c = zeros(p_order); c[1] = 1.0; # AR
b = zeros(q_order); b[1] = 1.0; # MA
S = shift(q_order); # MA


result = infer(
    model = ARMA(x_prev=x_prev_train, h_prior=h_prior, γ_prior=γ_prior, τ_prior=τ_prior, η_prior=η_prior, θ_prior=θ_prior, c=c, b=b, S=S), 
    data  = (x = x_train, ),
    initialization = arma_initialization,
    constraints   = arma_constraints,
    meta          = arma_meta,
    iterations    = 10,
    options       = (limit_stack_depth = 400, ),
);

plot(mean.(result.posteriors[:e][end]), ribbon = var.(result.posteriors[:e][end]), label = "eₜ")

# extract posteriors
h_posterior = result.posteriors[:h][end][end]
γ_posterior = result.posteriors[:γ][end]
τ_posterior = result.posteriors[:τ][end]
η_posterior = result.posteriors[:η][end]
θ_posterior = result.posteriors[:θ][end];

Prediction

Here we are going to use our inference results in order to predict the dataset itself

# The prediction function is aimed at approximating the predictive posterior distribution
# It triggers the rules in the generative order (in future, RxInfer.jl will provide this function out of the box)
function prediction(x_prev, h_posterior, γ_posterior, τ_posterior, η_posterior, θ_posterior, p, q)
    h_out = MvNormalMeanPrecision(mean(h_posterior), precision(h_posterior))
    ar_out = @call_rule AR(:y, Marginalisation) (m_x=h_out, q_θ=η_posterior, q_γ=τ_posterior, meta=ARMeta(Multivariate, p, ARsafe()))
    c = zeros(p); c[1] = 1.0
    b = zeros(q); b[1] = 1.0
    ar_dot_out = @call_rule typeof(dot)(:out, Marginalisation) (m_in1=PointMass(b), m_in2=ar_out)
    θ_out = MvNormalMeanPrecision(mean(θ_posterior), precision(θ_posterior))
    ma_dot_out = @call_rule typeof(dot)(:out, Marginalisation) (m_in1=PointMass(x_prev), m_in2=θ_out)
    e_out = @call_rule NormalMeanPrecision(:out, Marginalisation) (q_μ=PointMass(0.0), q_τ=mean(γ_posterior))
    ar_ma = @call_rule typeof(+)(:out, Marginalisation) (m_in1=ar_dot_out, m_in2=ma_dot_out)  
    @call_rule typeof(+)(:out, Marginalisation) (m_in1=ar_ma, m_in2=e_out)  
end

prediction (generic function with 1 method)

predictions = []
for x_prev in x_prev_test
    push!(predictions, prediction(x_prev, h_posterior, γ_posterior, τ_posterior, η_posterior, θ_posterior, p_order, q_order))
    # after every new prediction we can actually "retrain" the model to use the power of Bayesian approach
    # we will skip this part at this notebook
end

plot(x_test, label="test data", legend=:topleft)
plot!(mean.(predictions)[1:end], ribbon=std.(predictions)[1:end], label="predicted", xlabel="day", ylabel="price")