Skip to content

Convolutional VAE for MNIST

Convolutional variational autoencoder (CVAE) implementation in MLX using MNIST. This is based on the CVAE implementation in MLX.

julia
using Lux,
    Reactant,
    MLDatasets,
    Random,
    Statistics,
    Enzyme,
    MLUtils,
    DataAugmentation,
    ConcreteStructs,
    OneHotArrays,
    ImageShow,
    Images,
    Printf,
    Optimisers

const xdev = reactant_device(; force=true)
const cdev = cpu_device()

const IN_VSCODE = isdefined(Main, :VSCodeServer)
false

Model Definition

First we will define the encoder.It maps the input to a normal distribution in latent space and sample a latent vector from that distribution.

julia
function cvae_encoder(
    rng=Random.default_rng();
    num_latent_dims::Int,
    image_shape::Dims{3},
    max_num_filters::Int,
)
    flattened_dim = prod(image_shape[1:2]  8) * max_num_filters
    return @compact(;
        embed=Chain(
            Chain(
                Conv((3, 3), image_shape[3] => max_num_filters ÷ 4; stride=2, pad=1),
                BatchNorm(max_num_filters ÷ 4, leakyrelu),
            ),
            Chain(
                Conv((3, 3), max_num_filters ÷ 4 => max_num_filters ÷ 2; stride=2, pad=1),
                BatchNorm(max_num_filters ÷ 2, leakyrelu),
            ),
            Chain(
                Conv((3, 3), max_num_filters ÷ 2 => max_num_filters; stride=2, pad=1),
                BatchNorm(max_num_filters, leakyrelu),
            ),
            FlattenLayer(),
        ),
        proj_mu=Dense(flattened_dim, num_latent_dims; init_bias=zeros32),
        proj_log_var=Dense(flattened_dim, num_latent_dims; init_bias=zeros32),
        rng
    ) do x
        y = embed(x)

        μ = proj_mu(y)
        logσ² = proj_log_var(y)

        T = eltype(logσ²)
        logσ² = clamp.(logσ², -T(20.0f0), T(10.0f0))
        σ = exp.(logσ² .* T(0.5))

        # Generate a tensor of random values from a normal distribution
        ϵ = randn_like(Lux.replicate(rng), σ)

        # Reparameterization trick to backpropagate through sampling
        z = ϵ .* σ .+ μ

        @return z, μ, logσ²
    end
end

Similarly we define the decoder.

julia
function cvae_decoder(; num_latent_dims::Int, image_shape::Dims{3}, max_num_filters::Int)
    flattened_dim = prod(image_shape[1:2]  8) * max_num_filters
    return @compact(;
        linear=Dense(num_latent_dims, flattened_dim),
        upchain=Chain(
            Chain(
                Upsample(2),
                Conv((3, 3), max_num_filters => max_num_filters ÷ 2; stride=1, pad=1),
                BatchNorm(max_num_filters ÷ 2, leakyrelu),
            ),
            Chain(
                Upsample(2),
                Conv((3, 3), max_num_filters ÷ 2 => max_num_filters ÷ 4; stride=1, pad=1),
                BatchNorm(max_num_filters ÷ 4, leakyrelu),
            ),
            Chain(
                Upsample(2),
                Conv(
                    (3, 3), max_num_filters ÷ 4 => image_shape[3], sigmoid; stride=1, pad=1
                ),
            ),
        ),
        max_num_filters
    ) do x
        y = linear(x)
        img = reshape(y, image_shape[1] ÷ 8, image_shape[2] ÷ 8, max_num_filters, :)
        @return upchain(img)
    end
end

@concrete struct CVAE <: AbstractLuxContainerLayer{(:encoder, :decoder)}
    encoder <: AbstractLuxLayer
    decoder <: AbstractLuxLayer
end

function CVAE(
    rng=Random.default_rng();
    num_latent_dims::Int,
    image_shape::Dims{3},
    max_num_filters::Int,
)
    decoder = cvae_decoder(; num_latent_dims, image_shape, max_num_filters)
    encoder = cvae_encoder(rng; num_latent_dims, image_shape, max_num_filters)
    return CVAE(encoder, decoder)
end

function (cvae::CVAE)(x, ps, st)
    (z, μ, logσ²), st_enc = cvae.encoder(x, ps.encoder, st.encoder)
    x_rec, st_dec = cvae.decoder(z, ps.decoder, st.decoder)
    return (x_rec, μ, logσ²), (; encoder=st_enc, decoder=st_dec)
end

function encode(cvae::CVAE, x, ps, st)
    (z, _, _), st_enc = cvae.encoder(x, ps.encoder, st.encoder)
    return z, (; encoder=st_enc, st.decoder)
end

function decode(cvae::CVAE, z, ps, st)
    x_rec, st_dec = cvae.decoder(z, ps.decoder, st.decoder)
    return x_rec, (; decoder=st_dec, st.encoder)
end

Loading MNIST

julia
@concrete struct TensorDataset
    dataset
    transform
    total_samples::Int
end

Base.length(ds::TensorDataset) = ds.total_samples

function Base.getindex(ds::TensorDataset, idxs::Union{Vector{<:Integer},AbstractRange})
    img = Image.(eachslice(convert2image(ds.dataset, idxs); dims=3))
    return stack(parent  itemdata  Base.Fix1(apply, ds.transform), img)
end

function loadmnist(batchsize, image_size::Dims{2})
    # Load MNIST: Only 1500 for demonstration purposes on CI
    train_dataset = MNIST(; split=:train)
    N = parse(Bool, get(ENV, "CI", "false")) ? 5000 : length(train_dataset)

    train_transform = ScaleKeepAspect(image_size) |> ImageToTensor()
    trainset = TensorDataset(train_dataset, train_transform, N)
    trainloader = DataLoader(trainset; batchsize, shuffle=true, partial=false)

    return trainloader
end

Helper Functions

Generate an Image Grid from a list of images

julia
function create_image_grid(imgs::AbstractArray, grid_rows::Int, grid_cols::Int)
    total_images = grid_rows * grid_cols
    imgs = map(eachslice(imgs[:, :, :, 1:total_images]; dims=4)) do img
        cimg = if size(img, 3) == 1
            colorview(Gray, view(img, :, :, 1))
        else
            colorview(RGB, permutedims(img, (3, 1, 2)))
        end
        return cimg'
    end
    return create_image_grid(imgs, grid_rows, grid_cols)
end

function create_image_grid(images::Vector, grid_rows::Int, grid_cols::Int)
    # Check if the number of images matches the grid
    total_images = grid_rows * grid_cols
    @assert length(images) == total_images

    # Get the size of a single image (assuming all images are the same size)
    img_height, img_width = size(images[1])

    # Create a blank grid canvas
    grid_height = img_height * grid_rows
    grid_width = img_width * grid_cols
    grid_canvas = similar(images[1], grid_height, grid_width)

    # Place each image in the correct position on the canvas
    for idx in 1:total_images
        row = div(idx - 1, grid_cols) + 1
        col = mod(idx - 1, grid_cols) + 1

        start_row = (row - 1) * img_height + 1
        start_col = (col - 1) * img_width + 1

        grid_canvas[start_row:(start_row + img_height - 1), start_col:(start_col + img_width - 1)] .= images[idx]
    end

    return grid_canvas
end

function loss_function(model, ps, st, X)
    (y, μ, logσ²), st = model(X, ps, st)
    reconstruction_loss = MSELoss(; agg=sum)(y, X)
    kldiv_loss = -sum(1 .+ logσ² .- μ .^ 2 .- exp.(logσ²)) / 2
    loss = reconstruction_loss + kldiv_loss
    return loss, st, (; y, μ, logσ², reconstruction_loss, kldiv_loss)
end

function generate_images(
    model, ps, st; num_samples::Int=128, num_latent_dims::Int, decode_compiled=nothing
)
    z = get_device((ps, st))(randn(Float32, num_latent_dims, num_samples))
    if decode_compiled === nothing
        images, _ = decode(model, z, ps, Lux.testmode(st))
    else
        images, _ = decode_compiled(model, z, ps, Lux.testmode(st))
        images = cpu_device()(images)
    end
    return create_image_grid(images, 8, num_samples ÷ 8)
end

function reconstruct_images(model, ps, st, X)
    (recon, _, _), _ = model(X, ps, Lux.testmode(st))
    recon = cpu_device()(recon)
    return create_image_grid(recon, 8, size(X, ndims(X)) ÷ 8)
end
reconstruct_images (generic function with 1 method)

Training the Model

julia
function main(;
    batchsize=128,
    image_size=(64, 64),
    num_latent_dims=8,
    max_num_filters=64,
    seed=0,
    epochs=50,
    weight_decay=1.0e-5,
    learning_rate=1.0e-3,
    num_samples=batchsize,
)
    rng = Xoshiro()
    Random.seed!(rng, seed)

    cvae = CVAE(rng; num_latent_dims, image_shape=(image_size..., 1), max_num_filters)
    ps, st = xdev(Lux.setup(rng, cvae))

    z = xdev(randn(Float32, num_latent_dims, num_samples))
    decode_compiled = Reactant.with_config(;
        dot_general_precision=PrecisionConfig.HIGH,
        convolution_precision=PrecisionConfig.HIGH,
    ) do
        @compile decode(cvae, z, ps, Lux.testmode(st))
    end
    x = xdev(randn(Float32, image_size..., 1, batchsize))
    cvae_compiled = Reactant.with_config(;
        dot_general_precision=PrecisionConfig.HIGH,
        convolution_precision=PrecisionConfig.HIGH,
    ) do
        @compile cvae(x, ps, Lux.testmode(st))
    end

    train_dataloader = xdev(loadmnist(batchsize, image_size))

    opt = AdamW(; eta=learning_rate, lambda=weight_decay)

    train_state = Training.TrainState(cvae, ps, st, opt)

    @printf "Total Trainable Parameters: %0.4f M\n" (Lux.parameterlength(ps) / 1.0e6)

    empty_row, model_img_full = nothing, nothing

    for epoch in 1:epochs
        loss_total = 0.0f0
        total_samples = 0

        start_time = time()
        for (i, X) in enumerate(train_dataloader)
            (_, loss, _, train_state) = Training.single_train_step!(
                AutoEnzyme(), loss_function, X, train_state; return_gradients=Val(false)
            )

            loss_total += loss
            total_samples += size(X, ndims(X))

            if i % 250 == 0 || i == length(train_dataloader)
                throughput = total_samples / (time() - start_time)
                @printf "Epoch %d, Iter %d, Loss: %.7f, Throughput: %.6f im/s\n" epoch i loss throughput
            end
        end
        total_time = time() - start_time

        train_loss = loss_total / length(train_dataloader)
        throughput = total_samples / total_time
        @printf "Epoch %d, Train Loss: %.7f, Time: %.4fs, Throughput: %.6f im/s\n" epoch train_loss total_time throughput

        if IN_VSCODE || epoch == epochs
            recon_images = reconstruct_images(
                cvae_compiled,
                train_state.parameters,
                train_state.states,
                first(train_dataloader),
            )
            gen_images = generate_images(
                cvae,
                train_state.parameters,
                train_state.states;
                num_samples,
                num_latent_dims,
                decode_compiled,
            )
            if empty_row === nothing
                empty_row = similar(gen_images, image_size[1], size(gen_images, 2))
                fill!(empty_row, 0)
            end
            model_img_full = vcat(recon_images, empty_row, gen_images)
            IN_VSCODE && display(model_img_full)
        end
    end

    return model_img_full
end

img = main()
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1761834569.102805 1972512 service.cc:158] XLA service 0x40e1f980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1761834569.102852 1972512 service.cc:166]   StreamExecutor device (0): NVIDIA A100-PCIE-40GB MIG 1g.5gb, Compute Capability 8.0
I0000 00:00:1761834569.103830 1972512 se_gpu_pjrt_client.cc:770] Using BFC allocator.
I0000 00:00:1761834569.103895 1972512 gpu_helpers.cc:136] XLA backend allocating 3825205248 bytes on device 0 for BFCAllocator.
I0000 00:00:1761834569.103957 1972512 gpu_helpers.cc:177] XLA backend will use up to 1275068416 bytes on device 0 for CollectiveBFCAllocator.
I0000 00:00:1761834569.115300 1972512 cuda_dnn.cc:463] Loaded cuDNN version 91400
Total Trainable Parameters: 0.1493 M
┌ Warning: `training` is set to `Val{true}()` but is not being used within an autodiff call (gradient, jacobian, etc...). This will be slow. If you are using a `Lux.jl` model, set it to inference (test) mode using `LuxCore.testmode`. Reliance on this behavior is discouraged, and is not guaranteed by Semantic Versioning, and might be removed without a deprecation cycle. It is recommended to fix this issue in your code.
└ @ LuxLib.Utils /var/lib/buildkite-agent/builds/gpuci-13/julialang/lux-dot-jl/lib/LuxLib/src/utils.jl:334
Epoch 1, Iter 39, Loss: 25197.1503906, Throughput: 54.710863 im/s
Epoch 1, Train Loss: 39852.8242188, Time: 91.7098s, Throughput: 54.432580 im/s
Epoch 2, Iter 39, Loss: 18876.9746094, Throughput: 1770.750264 im/s
Epoch 2, Train Loss: 20353.8964844, Time: 2.8195s, Throughput: 1770.525361 im/s
Epoch 3, Iter 39, Loss: 15212.1064453, Throughput: 1900.029290 im/s
Epoch 3, Train Loss: 16778.9589844, Time: 2.6276s, Throughput: 1899.854300 im/s
Epoch 4, Iter 39, Loss: 14866.9003906, Throughput: 1808.266565 im/s
Epoch 4, Train Loss: 15279.5371094, Time: 2.7610s, Throughput: 1808.070127 im/s
Epoch 5, Iter 39, Loss: 13263.1718750, Throughput: 1894.399287 im/s
Epoch 5, Train Loss: 14384.4003906, Time: 2.6353s, Throughput: 1894.266290 im/s
Epoch 6, Iter 39, Loss: 13207.9082031, Throughput: 1938.797455 im/s
Epoch 6, Train Loss: 13614.0419922, Time: 2.5750s, Throughput: 1938.634817 im/s
Epoch 7, Iter 39, Loss: 13614.1396484, Throughput: 2038.004353 im/s
Epoch 7, Train Loss: 13130.2099609, Time: 2.4497s, Throughput: 2037.824646 im/s
Epoch 8, Iter 39, Loss: 12339.8105469, Throughput: 1962.492466 im/s
Epoch 8, Train Loss: 12670.5927734, Time: 2.5441s, Throughput: 1962.225418 im/s
Epoch 9, Iter 39, Loss: 12791.9316406, Throughput: 1902.470259 im/s
Epoch 9, Train Loss: 12321.4599609, Time: 2.6242s, Throughput: 1902.265785 im/s
Epoch 10, Iter 39, Loss: 11722.2382812, Throughput: 1811.739514 im/s
Epoch 10, Train Loss: 12116.5439453, Time: 2.7556s, Throughput: 1811.597493 im/s
Epoch 11, Iter 39, Loss: 12004.1425781, Throughput: 1839.426012 im/s
Epoch 11, Train Loss: 11842.2343750, Time: 2.7142s, Throughput: 1839.235511 im/s
Epoch 12, Iter 39, Loss: 11931.0722656, Throughput: 1793.491167 im/s
Epoch 12, Train Loss: 11641.5996094, Time: 2.7838s, Throughput: 1793.255997 im/s
Epoch 13, Iter 39, Loss: 12368.5312500, Throughput: 1851.009457 im/s
Epoch 13, Train Loss: 11381.0683594, Time: 2.6971s, Throughput: 1850.872175 im/s
Epoch 14, Iter 39, Loss: 11562.6367188, Throughput: 1839.949409 im/s
Epoch 14, Train Loss: 11248.7568359, Time: 2.7133s, Throughput: 1839.796141 im/s
Epoch 15, Iter 39, Loss: 10514.7978516, Throughput: 1872.980408 im/s
Epoch 15, Train Loss: 11143.8535156, Time: 2.6655s, Throughput: 1872.799812 im/s
Epoch 16, Iter 39, Loss: 11745.0615234, Throughput: 1855.078062 im/s
Epoch 16, Train Loss: 11025.8398438, Time: 2.6916s, Throughput: 1854.686481 im/s
Epoch 17, Iter 39, Loss: 11311.2431641, Throughput: 1860.424201 im/s
Epoch 17, Train Loss: 10993.0722656, Time: 2.6835s, Throughput: 1860.272462 im/s
Epoch 18, Iter 39, Loss: 10927.7148438, Throughput: 1857.185711 im/s
Epoch 18, Train Loss: 10859.9560547, Time: 2.6883s, Throughput: 1856.965986 im/s
Epoch 19, Iter 39, Loss: 10451.0800781, Throughput: 1826.210673 im/s
Epoch 19, Train Loss: 10811.2773438, Time: 2.7339s, Throughput: 1825.971621 im/s
Epoch 20, Iter 39, Loss: 10386.2050781, Throughput: 1833.671384 im/s
Epoch 20, Train Loss: 10634.4775391, Time: 2.7226s, Throughput: 1833.510491 im/s
Epoch 21, Iter 39, Loss: 10334.7363281, Throughput: 1875.836207 im/s
Epoch 21, Train Loss: 10544.3339844, Time: 2.6614s, Throughput: 1875.700932 im/s
Epoch 22, Iter 39, Loss: 10405.4189453, Throughput: 1848.636813 im/s
Epoch 22, Train Loss: 10572.1787109, Time: 2.7006s, Throughput: 1848.473283 im/s
Epoch 23, Iter 39, Loss: 10515.1132812, Throughput: 1886.527888 im/s
Epoch 23, Train Loss: 10484.7373047, Time: 2.6464s, Throughput: 1886.326826 im/s
Epoch 24, Iter 39, Loss: 10722.0595703, Throughput: 1890.281139 im/s
Epoch 24, Train Loss: 10414.4335938, Time: 2.6411s, Throughput: 1890.126539 im/s
Epoch 25, Iter 39, Loss: 10396.4570312, Throughput: 1873.527267 im/s
Epoch 25, Train Loss: 10257.4482422, Time: 2.6648s, Throughput: 1873.318407 im/s
Epoch 26, Iter 39, Loss: 10374.4003906, Throughput: 1887.058538 im/s
Epoch 26, Train Loss: 10255.7119141, Time: 2.6456s, Throughput: 1886.902253 im/s
Epoch 27, Iter 39, Loss: 10256.1699219, Throughput: 1946.904736 im/s
Epoch 27, Train Loss: 10146.0351562, Time: 2.5643s, Throughput: 1946.736934 im/s
Epoch 28, Iter 39, Loss: 9766.9453125, Throughput: 1971.652117 im/s
Epoch 28, Train Loss: 10174.7519531, Time: 2.5321s, Throughput: 1971.464429 im/s
Epoch 29, Iter 39, Loss: 10157.7363281, Throughput: 2016.669049 im/s
Epoch 29, Train Loss: 10114.7734375, Time: 2.4756s, Throughput: 2016.446671 im/s
Epoch 30, Iter 39, Loss: 9905.4218750, Throughput: 2041.481551 im/s
Epoch 30, Train Loss: 10080.8027344, Time: 2.4456s, Throughput: 2041.215260 im/s
Epoch 31, Iter 39, Loss: 9732.1738281, Throughput: 2021.823678 im/s
Epoch 31, Train Loss: 10007.7304688, Time: 2.4694s, Throughput: 2021.511744 im/s
Epoch 32, Iter 39, Loss: 9549.0283203, Throughput: 2001.821277 im/s
Epoch 32, Train Loss: 9949.5654297, Time: 2.4940s, Throughput: 2001.598908 im/s
Epoch 33, Iter 39, Loss: 10060.7675781, Throughput: 1914.628669 im/s
Epoch 33, Train Loss: 9860.9707031, Time: 2.6076s, Throughput: 1914.431900 im/s
Epoch 34, Iter 39, Loss: 10692.0566406, Throughput: 1878.152530 im/s
Epoch 34, Train Loss: 9874.6357422, Time: 2.6582s, Throughput: 1877.969587 im/s
Epoch 35, Iter 39, Loss: 9821.5312500, Throughput: 1896.683359 im/s
Epoch 35, Train Loss: 9885.1767578, Time: 2.6323s, Throughput: 1896.459857 im/s
Epoch 36, Iter 39, Loss: 9193.0566406, Throughput: 1884.433541 im/s
Epoch 36, Train Loss: 9775.0976562, Time: 2.6493s, Throughput: 1884.244964 im/s
Epoch 37, Iter 39, Loss: 9392.2617188, Throughput: 1834.354612 im/s
Epoch 37, Train Loss: 9727.9980469, Time: 2.7216s, Throughput: 1834.189582 im/s
Epoch 38, Iter 39, Loss: 10201.0429688, Throughput: 1893.288586 im/s
Epoch 38, Train Loss: 9701.8837891, Time: 2.6369s, Throughput: 1893.146503 im/s
Epoch 39, Iter 39, Loss: 9828.3300781, Throughput: 1961.807520 im/s
Epoch 39, Train Loss: 9676.5488281, Time: 2.5448s, Throughput: 1961.642653 im/s
Epoch 40, Iter 39, Loss: 9877.8818359, Throughput: 1972.304010 im/s
Epoch 40, Train Loss: 9672.9697266, Time: 2.5323s, Throughput: 1971.321135 im/s
Epoch 41, Iter 39, Loss: 9904.9765625, Throughput: 1863.086959 im/s
Epoch 41, Train Loss: 9648.8632812, Time: 2.6797s, Throughput: 1862.906276 im/s
Epoch 42, Iter 39, Loss: 10335.6054688, Throughput: 1874.735915 im/s
Epoch 42, Train Loss: 9612.5439453, Time: 2.6632s, Throughput: 1874.425426 im/s
Epoch 43, Iter 39, Loss: 9962.9726562, Throughput: 1746.133371 im/s
Epoch 43, Train Loss: 9573.4238281, Time: 2.8592s, Throughput: 1745.924577 im/s
Epoch 44, Iter 39, Loss: 9540.8281250, Throughput: 1825.847422 im/s
Epoch 44, Train Loss: 9563.1806641, Time: 2.7343s, Throughput: 1825.707798 im/s
Epoch 45, Iter 39, Loss: 9357.1894531, Throughput: 1853.433316 im/s
Epoch 45, Train Loss: 9546.3720703, Time: 2.6936s, Throughput: 1853.264671 im/s
Epoch 46, Iter 39, Loss: 8637.3974609, Throughput: 1850.404032 im/s
Epoch 46, Train Loss: 9438.0546875, Time: 2.6985s, Throughput: 1849.896737 im/s
Epoch 47, Iter 39, Loss: 9746.9902344, Throughput: 1882.947148 im/s
Epoch 47, Train Loss: 9390.0810547, Time: 2.6516s, Throughput: 1882.669483 im/s
Epoch 48, Iter 39, Loss: 9583.6503906, Throughput: 1835.091585 im/s
Epoch 48, Train Loss: 9377.5000000, Time: 2.7206s, Throughput: 1834.916291 im/s
Epoch 49, Iter 39, Loss: 9356.6992188, Throughput: 1830.993349 im/s
Epoch 49, Train Loss: 9320.1875000, Time: 2.7266s, Throughput: 1830.832925 im/s
Epoch 50, Iter 39, Loss: 8861.4980469, Throughput: 1809.086972 im/s
Epoch 50, Train Loss: 9383.0751953, Time: 2.7598s, Throughput: 1808.848944 im/s

Appendix

julia
using InteractiveUtils
InteractiveUtils.versioninfo()

if @isdefined(MLDataDevices)
    if @isdefined(CUDA) && MLDataDevices.functional(CUDADevice)
        println()
        CUDA.versioninfo()
    end

    if @isdefined(AMDGPU) && MLDataDevices.functional(AMDGPUDevice)
        println()
        AMDGPU.versioninfo()
    end
end
Julia Version 1.11.7
Commit f2b3dbda30a (2025-09-08 12:10 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 48 × AMD EPYC 7402 24-Core Processor
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, znver2)
Threads: 48 default, 0 interactive, 24 GC (on 2 virtual cores)
Environment:
  JULIA_CPU_THREADS = 2
  JULIA_DEPOT_PATH = /root/.cache/julia-buildkite-plugin/depots/01872db4-8c79-43af-ab7d-12abac4f24f6
  LD_LIBRARY_PATH = /usr/local/nvidia/lib:/usr/local/nvidia/lib64
  JULIA_PKG_SERVER = 
  JULIA_NUM_THREADS = 48
  JULIA_CUDA_HARD_MEMORY_LIMIT = 100%
  JULIA_PKG_PRECOMPILE_AUTO = 0
  JULIA_DEBUG = Literate

This page was generated using Literate.jl.