Boston with Flux

Download the notebook, the raw script, or the annotated script for this tutorial (right-click on the link and save). Main author: Ayush Shridhar (ayush-1506).

Getting started

import MLJFlux
import MLJ
import DataFrames
import Statistics
import Flux
using Random
using PyPlot

Random.seed!(11)
MersenneTwister(UInt32[0x0000000b]) @ 1002

Loading the Boston dataset. Our aim will be to implement a neural network regressor to predict the price of a house, given a number of features.

features, targets = MLJ.@load_boston
features = DataFrames.DataFrame(features)
@show size(features)
@show targets[1:3]
first(features, 3) |> MLJ.pretty
size(features) = (506, 12)
targets[1:3] = [24.0, 21.6, 34.7]
┌────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐
│ Crim       │ Zn         │ Indus      │ NOx        │ Rm         │ Age        │ Dis        │ Rad        │ Tax        │ PTRatio    │ Black      │ LStat      │
│ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │ Float64    │
│ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │ Continuous │
├────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤
│ 0.00632    │ 18.0       │ 2.31       │ 0.538      │ 6.575      │ 65.2       │ 4.09       │ 1.0        │ 296.0      │ 15.3       │ 396.9      │ 4.98       │
│ 0.02731    │ 0.0        │ 7.07       │ 0.469      │ 6.421      │ 78.9       │ 4.9671     │ 2.0        │ 242.0      │ 17.8       │ 396.9      │ 9.14       │
│ 0.02729    │ 0.0        │ 7.07       │ 0.469      │ 7.185      │ 61.1       │ 4.9671     │ 2.0        │ 242.0      │ 17.8       │ 392.83     │ 4.03       │
└────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘

Next obvious steps: partitioning into train and test set

train, test = MLJ.partition(MLJ.eachindex(targets), 0.70, rng=52)
([358, 422, 334, 476, 1, 441, 12, 115, 240, 104, 208, 158, 46, 504, 462, 101, 157, 92, 287, 360, 385, 330, 475, 465, 117, 300, 246, 230, 105, 38, 436, 481, 424, 44, 73, 296, 61, 244, 371, 14, 195, 444, 489, 235, 143, 428, 172, 66, 318, 323, 232, 74, 338, 77, 57, 23, 357, 437, 401, 127, 397, 356, 404, 136, 260, 4, 327, 121, 432, 445, 43, 19, 304, 468, 141, 47, 280, 85, 342, 440, 51, 169, 67, 168, 231, 361, 126, 54, 396, 190, 270, 164, 409, 176, 383, 352, 184, 322, 156, 416, 398, 197, 329, 220, 377, 60, 71, 494, 266, 491, 479, 130, 369, 109, 53, 214, 179, 380, 39, 119, 233, 316, 469, 213, 114, 457, 211, 152, 408, 324, 155, 319, 171, 276, 50, 102, 482, 82, 139, 420, 15, 206, 151, 486, 410, 209, 203, 364, 473, 10, 34, 282, 120, 285, 227, 68, 317, 98, 7, 459, 100, 133, 478, 439, 186, 97, 177, 159, 18, 228, 466, 362, 320, 99, 267, 212, 484, 40, 153, 279, 337, 339, 281, 249, 359, 349, 302, 224, 25, 325, 488, 69, 76, 265, 429, 268, 91, 255, 333, 123, 111, 415, 321, 33, 226, 256, 106, 129, 183, 307, 165, 95, 471, 196, 435, 229, 70, 348, 273, 137, 373, 26, 90, 506, 28, 303, 161, 449, 311, 447, 204, 414, 116, 378, 326, 480, 63, 382, 312, 306, 501, 8, 41, 247, 288, 393, 163, 388, 328, 310, 6, 474, 89, 375, 167, 16, 505, 201, 79, 443, 346, 49, 202, 347, 110, 374, 35, 405, 425, 309, 258, 187, 341, 86, 216, 24, 343, 138, 94, 248, 314, 455, 308, 88, 294, 419, 78, 81, 293, 215, 406, 427, 407, 417, 376, 194, 490, 344, 118, 27, 472, 103, 182, 42, 198, 36, 386, 236, 87, 200, 289, 52, 413, 456, 336, 400, 144, 83, 389, 237, 502, 412, 181, 162, 134, 191, 430, 219, 9, 331, 292, 173, 438, 243, 446, 125, 188, 252, 262, 58, 205, 175, 477, 301, 250, 497, 345, 132, 291, 277, 257, 379, 218, 166], [225, 189, 245, 418, 295, 135, 463, 487, 37, 207, 332, 434, 210, 283, 391, 21, 297, 59, 17, 238, 193, 387, 241, 275, 448, 217, 62, 458, 298, 452, 146, 150, 22, 470, 45, 503, 11, 426, 363, 467, 128, 498, 32, 154, 461, 56, 423, 160, 402, 251, 3, 131, 199, 464, 495, 353, 254, 64, 234, 96, 263, 284, 442, 372, 399, 313, 365, 500, 80, 454, 122, 5, 367, 113, 20, 223, 315, 29, 384, 72, 272, 499, 421, 394, 286, 174, 261, 453, 450, 112, 366, 269, 274, 93, 13, 185, 492, 148, 354, 278, 2, 305, 259, 239, 124, 335, 392, 75, 142, 108, 170, 140, 149, 350, 180, 460, 192, 340, 290, 451, 264, 431, 395, 485, 351, 381, 271, 145, 178, 55, 496, 411, 493, 370, 390, 107, 403, 65, 31, 222, 221, 299, 355, 483, 30, 433, 84, 242, 368, 147, 48, 253])

Let us try to implement an Neural Network regressor using Flux.jl. MLJFlux.jl provides an MLJ interface to the Flux.jl deep learning framework. The package provides four essential models: NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier and ImageClassifier.

At the heart of these models is a neural network. This is specified using the builder parameter. Creating a builder object consists of two steps: Step 1: Creating a new struct inherited from MLJFlux.Builder. MLJFlux.Builder is an abstract structure used for the purpose of dispatching. Suppose we define a new struct called MyNetworkBuilder. This can contain any attribute required to build the model later. (Step 2). Let's use Dense Neural Network with 2 hidden layers.

mutable struct MyNetworkBuilder <: MLJFlux.Builder
    n1::Int #Number of cells in the first hidden layer
    n2::Int #Number of cells in the second hidden layer
end

Step 2: Building the neural network from this object. Extend the MLJFlux.build function. This takes in 3 arguments: The object of MyNetworkBuilder, input dimension (ip) and output dimension (op).

function MLJFlux.build(model::MyNetworkBuilder, input_dims, output_dims)
    layer1 = Flux.Dense(input_dims, model.n1)
    layer2 = Flux.Dense(model.n1, model.n2)
    layer3 = Flux.Dense(model.n2, output_dims)
    return Flux.Chain(layer1, layer2, layer3)
end

With all definitions ready, let us create an object of this:

myregressor = MyNetworkBuilder(20, 10)
MyNetworkBuilder @788

Since the boston dataset is a regression problem, we'll be using NeuralNetworkRegressor here. One thing to remember is that a NeuralNetworkRegressor object works seamlessly like any other MLJ model: you can wrap it in an MLJ machine and do anything you'd do otherwise.

Let's start by defining our NeuralNetworkRegressor object, that takes myregressor as it's parameter.

nnregressor = MLJFlux.NeuralNetworkRegressor(builder=myregressor, epochs=10)
NeuralNetworkRegressor(
    builder = MyNetworkBuilder(
            n1 = 20,
            n2 = 10),
    optimiser = Flux.Optimise.ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}()),
    loss = Flux.mse,
    epochs = 10,
    batch_size = 1,
    lambda = 0.0,
    alpha = 0.0,
    optimiser_changes_trigger_retraining = false) @546

Other parameters that NeuralNetworkRegressor takes can be found here: https://github.com/alan-turing-institute/MLJFlux.jl#model-hyperparameters

nnregressor now acts like any other MLJ model. Let's try wrapping it in a MLJ machine and calling fit!, predict.

mach = MLJ.machine(nnregressor, features, targets)
Machine{NeuralNetworkRegressor{MyNetworkBuilder,…}} @593 trained 0 times.
  args: 
    1:	Source @496 ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	Source @992 ⏎ `AbstractArray{Continuous,1}`

Let's fit this on the train set

MLJ.fit!(mach, rows=train, verbosity=3)
Machine{NeuralNetworkRegressor{MyNetworkBuilder,…}} @593 trained 1 time.
  args: 
    1:	Source @496 ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	Source @992 ⏎ `AbstractArray{Continuous,1}`

As we can see, the training loss decreases at each epoch, showing the the neural network is gradually learning form the training set.

preds = MLJ.predict(mach, features[test, :])

print(preds[1:5])
Float32[29.322287, 26.417507, 24.125174, -2.5468833, 20.77854]

Now let's retrain our model. One thing to remember is that retrainig may OR may not re-initialize our neural network model parameters. For example, changing the number of epochs to 15 will not causes the model to train to 15 epcohs, but just 5 additional epochs.

nnregressor.epochs = 15

MLJ.fit!(mach, rows=train, verbosity=3)
Machine{NeuralNetworkRegressor{MyNetworkBuilder,…}} @593 trained 2 times.
  args: 
    1:	Source @496 ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	Source @992 ⏎ `AbstractArray{Continuous,1}`

You can always specify that you want to retrain the model from scratch using the force=true parameter. (Look at documentation for fit! for more).

However, changing parameters such as batch_size will necessarily cause re-training from scratch.

nnregressor.batch_size = 2
MLJ.fit!(mach, rows=train, verbosity=3)
Machine{NeuralNetworkRegressor{MyNetworkBuilder,…}} @593 trained 3 times.
  args: 
    1:	Source @496 ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	Source @992 ⏎ `AbstractArray{Continuous,1}`

Another bit to remember here is that changing the optimiser doesn't cause retaining by default. However, the optimiser_changes_trigger_retraining in NeuralNetworkRegressor can be toggled to accomodate this. This allows one to modify the learning rate, for example, after an initial burn-in period.

# Inspecting out-of-sample loss as a function of epochs

r = MLJ.range(nnregressor, :epochs, lower=1, upper=30, scale=:log10)
curve = MLJ.learning_curve(nnregressor, features, targets,
                       range=r,
                       resampling=MLJ.Holdout(fraction_train=0.7),
                       measure=MLJ.l2)

figure(figsize=(8,6))

plt.plot(curve.parameter_values,
    curve.measurements)

yscale("log")
xlabel(curve.parameter_name)
ylabel("l2")
UndefVarError: savefig not defined
BostonFlux1

Tuning

As mentioned above, nnregressor can act like any other MLJ model. Let's try to tune the batch_size parameter.

bs = MLJ.range(nnregressor, :batch_size, lower=1, upper=5)

tm = MLJ.TunedModel(model=nnregressor, ranges=[bs, ], measure=MLJ.l2)
DeterministicTunedModel(
    model = NeuralNetworkRegressor(
            builder = MyNetworkBuilder @788,
            optimiser = Flux.Optimise.ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}()),
            loss = Flux.mse,
            epochs = 15,
            batch_size = 2,
            lambda = 0.0,
            alpha = 0.0,
            optimiser_changes_trigger_retraining = false),
    tuning = Grid(
            goal = nothing,
            resolution = 10,
            shuffle = true,
            rng = Random._GLOBAL_RNG()),
    resampling = Holdout(
            fraction_train = 0.7,
            shuffle = false,
            rng = Random._GLOBAL_RNG()),
    measure = l2(),
    weights = nothing,
    operation = MLJModelInterface.predict,
    range = MLJBase.NumericRange{Int64,MLJBase.Bounded,Symbol}[NumericRange{Int64,…} @426],
    train_best = true,
    repeats = 1,
    n = nothing,
    acceleration = CPU1{Nothing}(nothing),
    acceleration_resampling = CPU1{Nothing}(nothing),
    check_measure = true) @281

For more on tuning, refer to the model-tuning tutorial.

m = MLJ.machine(tm, features, targets)

MLJ.fit!(m)
Machine{DeterministicTunedModel{Grid,…}} @972 trained 1 time.
  args: 
    1:	Source @386 ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	Source @367 ⏎ `AbstractArray{Continuous,1}`

This evaluated the model at each value of our range. The best value is:

MLJ.fitted_params(m).best_model.batch_size
2