Common MLJ Workflows
Data ingestion
import RDatasets
channing = RDatasets.dataset("boot", "channing")
first(channing, 4)
Sex | Entry | Exit | Time | Cens | |
---|---|---|---|---|---|
Cat… | Int32 | Int32 | Int32 | Int32 | |
1 | Male | 782 | 909 | 127 | 1 |
2 | Male | 1020 | 1128 | 108 | 1 |
3 | Male | 856 | 969 | 113 | 1 |
4 | Male | 915 | 957 | 42 | 1 |
Inspecting metadata, including column scientific types:
schema(channing)
┌─────────┬────────────────────────────────┬───────────────┐
│ _.names │ _.types │ _.scitypes │
├─────────┼────────────────────────────────┼───────────────┤
│ Sex │ CategoricalValue{String,UInt8} │ Multiclass{2} │
│ Entry │ Int32 │ Count │
│ Exit │ Int32 │ Count │
│ Time │ Int32 │ Count │
│ Cens │ Int32 │ Count │
└─────────┴────────────────────────────────┴───────────────┘
_.nrows = 462
Unpacking data and correcting for wrong scitypes:
y, X = unpack(channing,
==(:Exit), # y is the :Exit column
!=(:Time); # X is the rest, except :Time
:Exit=>Continuous,
:Entry=>Continuous,
:Cens=>Multiclass)
first(X, 4)
Sex | Entry | Cens | |
---|---|---|---|
Cat… | Float64 | Cat… | |
1 | Male | 782.0 | 1 |
2 | Male | 1020.0 | 1 |
3 | Male | 856.0 | 1 |
4 | Male | 915.0 | 1 |
Note: Before julia 1.2, replace !=(:Time)
with col -> col != :Time
.
y[1:4]
4-element Array{Float64,1}:
909.0
1128.0
969.0
957.0
Loading a built-in supervised dataset:
X, y = @load_iris;
selectrows(X, 1:4) # selectrows works for any Tables.jl table
(sepal_length = [5.1, 4.9, 4.7, 4.6],
sepal_width = [3.5, 3.0, 3.2, 3.1],
petal_length = [1.4, 1.4, 1.3, 1.5],
petal_width = [0.2, 0.2, 0.2, 0.2],)
y[1:4]
4-element CategoricalArray{String,1,UInt32}:
"setosa"
"setosa"
"setosa"
"setosa"
Model search
Reference: Model Search
Searching for a supervised model:
X, y = @load_boston
models(matching(X, y))
57-element Array{NamedTuple{(:name, :package_name, :is_supervised, :docstring, :hyperparameter_ranges, :hyperparameter_types, :hyperparameters, :implemented_methods, :is_pure_julia, :is_wrapper, :load_path, :package_license, :package_url, :package_uuid, :prediction_type, :supports_online, :supports_weights, :input_scitype, :target_scitype, :output_scitype),T} where T<:Tuple,1}:
(name = ARDRegressor, package_name = ScikitLearn, ... )
(name = AdaBoostRegressor, package_name = ScikitLearn, ... )
(name = BaggingRegressor, package_name = ScikitLearn, ... )
(name = BayesianRidgeRegressor, package_name = ScikitLearn, ... )
(name = ConstantRegressor, package_name = MLJModels, ... )
(name = DecisionTreeRegressor, package_name = DecisionTree, ... )
(name = DeterministicConstantRegressor, package_name = MLJModels, ... )
(name = DummyRegressor, package_name = ScikitLearn, ... )
(name = ElasticNetCVRegressor, package_name = ScikitLearn, ... )
(name = ElasticNetRegressor, package_name = MLJLinearModels, ... )
⋮
(name = RidgeRegressor, package_name = MultivariateStats, ... )
(name = RidgeRegressor, package_name = ScikitLearn, ... )
(name = RobustRegressor, package_name = MLJLinearModels, ... )
(name = SGDRegressor, package_name = ScikitLearn, ... )
(name = SVMLinearRegressor, package_name = ScikitLearn, ... )
(name = SVMNuRegressor, package_name = ScikitLearn, ... )
(name = SVMRegressor, package_name = ScikitLearn, ... )
(name = TheilSenRegressor, package_name = ScikitLearn, ... )
(name = XGBoostRegressor, package_name = XGBoost, ... )
models(matching(X, y))[6]
CART decision tree regressor.
→ based on [DecisionTree](https://github.com/bensadeghi/DecisionTree.jl).
→ do `@load DecisionTreeRegressor pkg="DecisionTree"` to use the model.
→ do `?DecisionTreeRegressor` for documentation.
(name = "DecisionTreeRegressor",
package_name = "DecisionTree",
is_supervised = true,
docstring = "CART decision tree regressor.\n→ based on [DecisionTree](https://github.com/bensadeghi/DecisionTree.jl).\n→ do `@load DecisionTreeRegressor pkg=\"DecisionTree\"` to use the model.\n→ do `?DecisionTreeRegressor` for documentation.",
hyperparameter_ranges = (nothing, nothing, nothing, nothing, nothing, nothing, nothing),
hyperparameter_types = ("Int64", "Int64", "Int64", "Float64", "Int64", "Bool", "Float64"),
hyperparameters = (:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold),
implemented_methods = [:predict, :clean!, :fit, :fitted_params],
is_pure_julia = true,
is_wrapper = false,
load_path = "MLJDecisionTreeInterface.DecisionTreeRegressor",
package_license = "MIT",
package_url = "https://github.com/bensadeghi/DecisionTree.jl",
package_uuid = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb",
prediction_type = :deterministic,
supports_online = false,
supports_weights = false,
input_scitype = Table{_s24} where _s24<:Union{AbstractArray{_s23,1} where _s23<:Continuous, AbstractArray{_s23,1} where _s23<:Count, AbstractArray{_s23,1} where _s23<:OrderedFactor},
target_scitype = AbstractArray{Continuous,1},
output_scitype = Unknown,)
More refined searches:
models() do model
matching(model, X, y) &&
model.prediction_type == :deterministic &&
model.is_pure_julia
end
18-element Array{NamedTuple{(:name, :package_name, :is_supervised, :docstring, :hyperparameter_ranges, :hyperparameter_types, :hyperparameters, :implemented_methods, :is_pure_julia, :is_wrapper, :load_path, :package_license, :package_url, :package_uuid, :prediction_type, :supports_online, :supports_weights, :input_scitype, :target_scitype, :output_scitype),T} where T<:Tuple,1}:
(name = DecisionTreeRegressor, package_name = DecisionTree, ... )
(name = DeterministicConstantRegressor, package_name = MLJModels, ... )
(name = ElasticNetRegressor, package_name = MLJLinearModels, ... )
(name = EvoTreeRegressor, package_name = EvoTrees, ... )
(name = HuberRegressor, package_name = MLJLinearModels, ... )
(name = KNNRegressor, package_name = NearestNeighbors, ... )
(name = KPLSRegressor, package_name = PartialLeastSquaresRegressor, ... )
(name = LADRegressor, package_name = MLJLinearModels, ... )
(name = LassoRegressor, package_name = MLJLinearModels, ... )
(name = LinearRegressor, package_name = MLJLinearModels, ... )
(name = LinearRegressor, package_name = MultivariateStats, ... )
(name = NeuralNetworkRegressor, package_name = MLJFlux, ... )
(name = PLSRegressor, package_name = PartialLeastSquaresRegressor, ... )
(name = QuantileRegressor, package_name = MLJLinearModels, ... )
(name = RandomForestRegressor, package_name = DecisionTree, ... )
(name = RidgeRegressor, package_name = MLJLinearModels, ... )
(name = RidgeRegressor, package_name = MultivariateStats, ... )
(name = RobustRegressor, package_name = MLJLinearModels, ... )
Searching for an unsupervised model:
models(matching(X))
24-element Array{NamedTuple{(:name, :package_name, :is_supervised, :docstring, :hyperparameter_ranges, :hyperparameter_types, :hyperparameters, :implemented_methods, :is_pure_julia, :is_wrapper, :load_path, :package_license, :package_url, :package_uuid, :prediction_type, :supports_online, :supports_weights, :input_scitype, :target_scitype, :output_scitype),T} where T<:Tuple,1}:
(name = AffinityPropagation, package_name = ScikitLearn, ... )
(name = AgglomerativeClustering, package_name = ScikitLearn, ... )
(name = Birch, package_name = ScikitLearn, ... )
(name = ContinuousEncoder, package_name = MLJModels, ... )
(name = DBSCAN, package_name = ScikitLearn, ... )
(name = FactorAnalysis, package_name = MultivariateStats, ... )
(name = FeatureAgglomeration, package_name = ScikitLearn, ... )
(name = FeatureSelector, package_name = MLJModels, ... )
(name = FillImputer, package_name = MLJModels, ... )
(name = ICA, package_name = MultivariateStats, ... )
⋮
(name = MeanShift, package_name = ScikitLearn, ... )
(name = MiniBatchKMeans, package_name = ScikitLearn, ... )
(name = OPTICS, package_name = ScikitLearn, ... )
(name = OneClassSVM, package_name = LIBSVM, ... )
(name = OneHotEncoder, package_name = MLJModels, ... )
(name = PCA, package_name = MultivariateStats, ... )
(name = PPCA, package_name = MultivariateStats, ... )
(name = SpectralClustering, package_name = ScikitLearn, ... )
(name = Standardizer, package_name = MLJModels, ... )
Getting the metadata entry for a given model type:
info("PCA")
info("RidgeRegressor", pkg="MultivariateStats") # a model type in multiple packages
Ridge regressor with regularization parameter lambda. Learns a
linear regression with a penalty on the l2 norm of the coefficients.
→ based on [MultivariateStats](https://github.com/JuliaStats/MultivariateStats.jl).
→ do `@load RidgeRegressor pkg="MultivariateStats"` to use the model.
→ do `?RidgeRegressor` for documentation.
(name = "RidgeRegressor",
package_name = "MultivariateStats",
is_supervised = true,
docstring = "Ridge regressor with regularization parameter lambda. Learns a\nlinear regression with a penalty on the l2 norm of the coefficients.\n\n→ based on [MultivariateStats](https://github.com/JuliaStats/MultivariateStats.jl).\n→ do `@load RidgeRegressor pkg=\"MultivariateStats\"` to use the model.\n→ do `?RidgeRegressor` for documentation.",
hyperparameter_ranges = (nothing, nothing),
hyperparameter_types = ("Union{Real, Union{AbstractArray{T,1}, AbstractArray{T,2}} where T}", "Bool"),
hyperparameters = (:lambda, :bias),
implemented_methods = [:predict, :clean!, :fit, :fitted_params],
is_pure_julia = true,
is_wrapper = false,
load_path = "MLJMultivariateStatsInterface.RidgeRegressor",
package_license = "MIT",
package_url = "https://github.com/JuliaStats/MultivariateStats.jl",
package_uuid = "6f286f6a-111f-5878-ab1e-185364afe411",
prediction_type = :deterministic,
supports_online = false,
supports_weights = false,
input_scitype = Table{_s24} where _s24<:(AbstractArray{_s23,1} where _s23<:Continuous),
target_scitype = Union{AbstractArray{Continuous,1}, Table{_s24} where _s24<:(AbstractArray{_s23,1} where _s23<:Continuous)},
output_scitype = Unknown,)
Instantiating a model
Reference: Getting Started
@load DecisionTreeClassifier
model = DecisionTreeClassifier(min_samples_split=5, max_depth=4)
DecisionTreeClassifier(
max_depth = 4,
min_samples_leaf = 1,
min_samples_split = 5,
min_purity_increase = 0.0,
n_subfeatures = 0,
post_prune = false,
merge_purity_threshold = 1.0,
pdf_smoothing = 0.0,
display_depth = 5) @163
or
model = @load DecisionTreeClassifier
model.min_samples_split = 5
model.max_depth = 4
Evaluating a model
Reference: Evaluating Model Performance
X, y = @load_boston
model = @load KNNRegressor
evaluate(model, X, y, resampling=CV(nfolds=5), measure=[rms, mav])
┌───────────────────────────┬───────────────┬───────────────────────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────────┼───────────────┼───────────────────────────────┤
│ RootMeanSquaredError @221 │ 8.77 │ [8.53, 8.8, 10.7, 9.43, 5.59] │
│ MeanAbsoluteError @600 │ 6.02 │ [6.52, 5.7, 7.65, 6.09, 4.11] │
└───────────────────────────┴───────────────┴───────────────────────────────┘
_.per_observation = [missing, missing]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Basic fit/evaluate/predict by hand:
Reference: Getting Started, Machines, Evaluating Model Performance, Performance Measures
import RDatasets
vaso = RDatasets.dataset("robustbase", "vaso"); # a DataFrame
first(vaso, 3)
Volume | Rate | Y | |
---|---|---|---|
Float64 | Float64 | Int64 | |
1 | 3.7 | 0.825 | 1 |
2 | 3.5 | 1.09 | 1 |
3 | 1.25 | 2.5 | 1 |
y, X = unpack(vaso, ==(:Y), c -> true; :Y => Multiclass)
tree_model = @load DecisionTreeClassifier
[ Info: For silent loading, specify `verbosity=0`.
[ Info: Model code for DecisionTreeClassifier already loaded
(MLJDecisionTreeInterface.DecisionTreeClassifier)() ✔
Bind the model and data together in a machine , which will additionally store the learned parameters (fitresults) when fit:
tree = machine(tree_model, X, y)
Machine{DecisionTreeClassifier} @965 trained 0 times.
args:
1: Source @720 ⏎ `Table{AbstractArray{Continuous,1}}`
2: Source @172 ⏎ `AbstractArray{Multiclass{2},1}`
Split row indices into training and evaluation rows:
train, test = partition(eachindex(y), 0.7, shuffle=true, rng=1234); # 70:30 split
([27, 28, 30, 31, 32, 18, 21, 9, 26, 14 … 7, 39, 2, 37, 1, 8, 19, 25, 35, 34], [22, 13, 11, 4, 10, 16, 3, 20, 29, 23, 12, 24])
Fit on train and evaluate on test:
fit!(tree, rows=train)
yhat = predict(tree, X[test,:])
mean(cross_entropy(yhat, y[test]))
6.5216583816514975
Predict on new data:
Xnew = (Volume=3*rand(3), Rate=3*rand(3))
predict(tree, Xnew) # a vector of distributions
3-element MLJBase.UnivariateFiniteArray{Multiclass{2},Int64,UInt32,Float64,1}:
UnivariateFinite{Multiclass{2}}(0=>0.9, 1=>0.1)
UnivariateFinite{Multiclass{2}}(0=>0.9, 1=>0.1)
UnivariateFinite{Multiclass{2}}(0=>0.273, 1=>0.727)
predict_mode(tree, Xnew) # a vector of point-predictions
3-element CategoricalArray{Int64,1,UInt32}:
0
0
1
More performance evaluation examples
import LossFunctions.ZeroOneLoss
Evaluating model + data directly:
evaluate(tree_model, X, y,
resampling=Holdout(fraction_train=0.7, shuffle=true, rng=1234),
measure=[cross_entropy, ZeroOneLoss()])
┌───────────────────────┬───────────────┬────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────┼───────────────┼────────────┤
│ LogLoss{Float64} @362 │ 6.52 │ [6.52] │
│ ZeroOneLoss │ 0.417 │ [0.417] │
└───────────────────────┴───────────────┴────────────┘
_.per_observation = [[[0.105, 36.0, ..., 1.3]], [[0.0, 1.0, ..., 1.0]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
If a machine is already defined, as above:
evaluate!(tree,
resampling=Holdout(fraction_train=0.7, shuffle=true, rng=1234),
measure=[cross_entropy, ZeroOneLoss()])
┌───────────────────────┬───────────────┬────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────┼───────────────┼────────────┤
│ LogLoss{Float64} @362 │ 6.52 │ [6.52] │
│ ZeroOneLoss │ 0.417 │ [0.417] │
└───────────────────────┴───────────────┴────────────┘
_.per_observation = [[[0.105, 36.0, ..., 1.3]], [[0.0, 1.0, ..., 1.0]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Using cross-validation:
evaluate!(tree, resampling=CV(nfolds=5, shuffle=true, rng=1234),
measure=[cross_entropy, ZeroOneLoss()])
┌───────────────────────┬───────────────┬──────────────────────────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────┼───────────────┼──────────────────────────────────┤
│ LogLoss{Float64} @362 │ 3.28 │ [9.25, 0.598, 4.93, 1.07, 0.546] │
│ ZeroOneLoss │ 0.407 │ [0.5, 0.375, 0.375, 0.5, 0.286] │
└───────────────────────┴───────────────┴──────────────────────────────────┘
_.per_observation = [[[2.22e-16, 0.944, ..., 2.22e-16], [0.847, 0.56, ..., 0.56], [0.799, 0.598, ..., 36.0], [2.01, 2.01, ..., 0.143], [0.405, 0.405, ..., 1.1]], [[0.0, 1.0, ..., 0.0], [1.0, 0.0, ..., 0.0], [1.0, 0.0, ..., 1.0], [1.0, 1.0, ..., 0.0], [0.0, 0.0, ..., 1.0]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
With user-specified train/test pairs of row indices:
f1, f2, f3 = 1:13, 14:26, 27:36
pairs = [(f1, vcat(f2, f3)), (f2, vcat(f3, f1)), (f3, vcat(f1, f2))];
evaluate!(tree,
resampling=pairs,
measure=[cross_entropy, ZeroOneLoss()])
┌───────────────────────┬───────────────┬───────────────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────┼───────────────┼───────────────────────┤
│ LogLoss{Float64} @362 │ 5.88 │ [2.16, 11.0, 4.51] │
│ ZeroOneLoss │ 0.241 │ [0.304, 0.304, 0.115] │
└───────────────────────┴───────────────┴───────────────────────┘
_.per_observation = [[[0.154, 0.154, ..., 0.154], [2.22e-16, 36.0, ..., 2.22e-16], [2.22e-16, 2.22e-16, ..., 0.693]], [[0.0, 0.0, ..., 0.0], [0.0, 1.0, ..., 0.0], [0.0, 0.0, ..., 0.0]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Changing a hyperparameter and re-evaluating:
tree_model.max_depth = 3
evaluate!(tree,
resampling=CV(nfolds=5, shuffle=true, rng=1234),
measure=[cross_entropy, ZeroOneLoss()])
┌───────────────────────┬───────────────┬────────────────────────────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────┼───────────────┼────────────────────────────────────┤
│ LogLoss{Float64} @362 │ 3.14 │ [9.18, 0.484, 4.86, 0.564, 0.624] │
│ ZeroOneLoss │ 0.361 │ [0.375, 0.25, 0.375, 0.375, 0.429] │
└───────────────────────┴───────────────┴────────────────────────────────────┘
_.per_observation = [[[2.22e-16, 1.32, ..., 2.22e-16], [2.22e-16, 0.318, ..., 0.318], [0.575, 2.22e-16, ..., 36.0], [1.5, 1.5, ..., 2.22e-16], [1.22, 2.22e-16, ..., 0.348]], [[0.0, 1.0, ..., 0.0], [0.0, 0.0, ..., 0.0], [0.0, 0.0, ..., 1.0], [1.0, 1.0, ..., 0.0], [1.0, 0.0, ..., 0.0]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Inspecting training results
Fit a ordinary least square model to some synthetic data:
x1 = rand(100)
x2 = rand(100)
X = (x1=x1, x2=x2)
y = x1 - 2x2 + 0.1*rand(100);
ols_model = @load LinearRegressor pkg=GLM
ols = machine(ols_model, X, y)
fit!(ols)
Machine{LinearRegressor} @538 trained 1 time.
args:
1: Source @567 ⏎ `Table{AbstractArray{Continuous,1}}`
2: Source @434 ⏎ `AbstractArray{Continuous,1}`
Get a named tuple representing the learned parameters, human-readable if appropriate:
fitted_params(ols)
(coef = [0.9967379417603076, -1.9968741775646128],
intercept = 0.05115868742692315,)
Get other training-related information:
report(ols)
(deviance = 0.0711827572566079,
dof_residual = 97.0,
stderror = [0.00948741098581678, 0.00966206429402877, 0.007462397809469231],
vcov = [9.001096721379693e-5 7.90714605162585e-6 -4.804577581003967e-5; 7.90714605162585e-6 9.335548642194568e-5 -5.008676999456014e-5; -4.804577581003967e-5 -5.008676999456014e-5 5.568738106677118e-5],)
Basic fit/transform for unsupervised models
Load data:
X, y = @load_iris
train, test = partition(eachindex(y), 0.97, shuffle=true, rng=123)
([125, 100, 130, 9, 70, 148, 39, 64, 6, 107 … 110, 59, 139, 21, 112, 144, 140, 72, 109, 41], [106, 147, 47, 5])
Instantiate and fit the model/machine:
@load PCA
pca_model = PCA(maxoutdim=2)
pca = machine(pca_model, X)
fit!(pca, rows=train)
Machine{PCA} @602 trained 1 time.
args:
1: Source @787 ⏎ `Table{AbstractArray{Continuous,1}}`
Transform selected data bound to the machine:
transform(pca, rows=test);
(x1 = [-3.3942826854483243, -1.5219827578765068, 2.538247455185219, 2.7299639893931373],
x2 = [0.5472450223745241, -0.36842368617126214, 0.5199299511335698, 0.3448466122232363],)
Transform new data:
Xnew = (sepal_length=rand(3), sepal_width=rand(3),
petal_length=rand(3), petal_width=rand(3));
transform(pca, Xnew)
(x1 = [4.91833617021395, 4.386896773799848, 4.3621336673249465],
x2 = [-4.23268203762916, -5.0300370692878635, -4.89504672814389],)
Inverting learned transformations
y = rand(100);
stand_model = UnivariateStandardizer()
stand = machine(stand_model, y)
fit!(stand)
z = transform(stand, y);
@assert inverse_transform(stand, z) ≈ y # true
[ Info: Training Machine{UnivariateStandardizer} @026.
Nested hyperparameter tuning
Reference: Tuning Models
Define a model with nested hyperparameters:
tree_model = @load DecisionTreeClassifier
forest_model = EnsembleModel(atom=tree_model, n=300)
ProbabilisticEnsembleModel(
atom = DecisionTreeClassifier(
max_depth = -1,
min_samples_leaf = 1,
min_samples_split = 2,
min_purity_increase = 0.0,
n_subfeatures = 0,
post_prune = false,
merge_purity_threshold = 1.0,
pdf_smoothing = 0.0,
display_depth = 5),
atomic_weights = Float64[],
bagging_fraction = 0.8,
rng = Random._GLOBAL_RNG(),
n = 300,
acceleration = CPU1{Nothing}(nothing),
out_of_bag_measure = Any[]) @171
Inspect all hyperparameters, even nested ones (returns nested named tuple):
params(forest_model)
(atom = (max_depth = -1,
min_samples_leaf = 1,
min_samples_split = 2,
min_purity_increase = 0.0,
n_subfeatures = 0,
post_prune = false,
merge_purity_threshold = 1.0,
pdf_smoothing = 0.0,
display_depth = 5,),
atomic_weights = Float64[],
bagging_fraction = 0.8,
rng = Random._GLOBAL_RNG(),
n = 300,
acceleration = CPU1{Nothing}(nothing),
out_of_bag_measure = Any[],)
Define ranges for hyperparameters to be tuned:
r1 = range(forest_model, :bagging_fraction, lower=0.5, upper=1.0, scale=:log10)
MLJBase.NumericRange(Float64, :bagging_fraction, ... )
r2 = range(forest_model, :(atom.n_subfeatures), lower=1, upper=4) # nested
MLJBase.NumericRange(Int64, :(atom.n_subfeatures), ... )
Wrap the model in a tuning strategy:
tuned_forest = TunedModel(model=forest_model,
tuning=Grid(resolution=12),
resampling=CV(nfolds=6),
ranges=[r1, r2],
measure=cross_entropy)
ProbabilisticTunedModel(
model = ProbabilisticEnsembleModel(
atom = DecisionTreeClassifier @232,
atomic_weights = Float64[],
bagging_fraction = 0.8,
rng = Random._GLOBAL_RNG(),
n = 300,
acceleration = CPU1{Nothing}(nothing),
out_of_bag_measure = Any[]),
tuning = Grid(
goal = nothing,
resolution = 12,
shuffle = true,
rng = Random._GLOBAL_RNG()),
resampling = CV(
nfolds = 6,
shuffle = false,
rng = Random._GLOBAL_RNG()),
measure = LogLoss(
tol = 2.220446049250313e-16),
weights = nothing,
operation = MLJModelInterface.predict,
range = MLJBase.NumericRange{T,MLJBase.Bounded,Symbol} where T[NumericRange{Float64,…} @670, NumericRange{Int64,…} @547],
selection_heuristic = MLJTuning.NaiveSelection(nothing),
train_best = true,
repeats = 1,
n = nothing,
acceleration = CPU1{Nothing}(nothing),
acceleration_resampling = CPU1{Nothing}(nothing),
check_measure = true) @013
Bound the wrapped model to data:
tuned = machine(tuned_forest, X, y)
Machine{ProbabilisticTunedModel{Grid,…}} @442 trained 0 times.
args:
1: Source @595 ⏎ `Table{AbstractArray{Continuous,1}}`
2: Source @997 ⏎ `AbstractArray{Multiclass{3},1}`
Fitting the resultant machine optimizes the hyperparameters specified in range
, using the specified tuning
and resampling
strategies and performance measure
(possibly a vector of measures), and retrains on all data bound to the machine:
fit!(tuned)
Machine{ProbabilisticTunedModel{Grid,…}} @442 trained 1 time.
args:
1: Source @595 ⏎ `Table{AbstractArray{Continuous,1}}`
2: Source @997 ⏎ `AbstractArray{Multiclass{3},1}`
Inspecting the optimal model:
F = fitted_params(tuned)
(best_model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @265,
best_fitted_params = (fitresult = WrappedEnsemble{Tuple{Node{Float64,…},…},…} @280,),)
F.best_model
ProbabilisticEnsembleModel(
atom = DecisionTreeClassifier(
max_depth = -1,
min_samples_leaf = 1,
min_samples_split = 2,
min_purity_increase = 0.0,
n_subfeatures = 3,
post_prune = false,
merge_purity_threshold = 1.0,
pdf_smoothing = 0.0,
display_depth = 5),
atomic_weights = Float64[],
bagging_fraction = 0.5,
rng = Random._GLOBAL_RNG(),
n = 300,
acceleration = CPU1{Nothing}(nothing),
out_of_bag_measure = Any[]) @265
Inspecting details of tuning procedure:
report(tuned)
(best_model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @265,
best_history_entry = (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @265,
measure = LogLoss{Float64}[LogLoss{Float64} @362],
measurement = [0.15631175701529243],
per_fold = [[0.0005342242024860014, 3.663735981263026e-15, 0.21485963265785696, 0.2660885008230325, 0.20661455570945844, 0.2497736286989171]],),
history = NamedTuple{(:model, :measure, :measurement, :per_fold),Tuple{MLJ.ProbabilisticEnsembleModel{MLJDecisionTreeInterface.DecisionTreeClassifier},Array{LogLoss{Float64},1},Array{Float64,1},Array{Array{Float64,1},1}}}[(model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @242, measure = [LogLoss{Float64} @362], measurement = [0.1785645358819857], per_fold = [[0.03590676928008607, 0.0047404723303941815, 0.2509402798216217, 0.2686672487293355, 0.2684149902688059, 0.24271745486167084]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @305, measure = [LogLoss{Float64} @362], measurement = [0.4463043411259328], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 0.36705597903279646, 1.6223381950906721, 0.3504529551287226, 0.3379789175033981]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @153, measure = [LogLoss{Float64} @362], measurement = [0.1592493722215185], per_fold = [[3.663735981263026e-15, 0.0014691165568300926, 0.2076348040941929, 0.2537762415913282, 0.24970257593125406, 0.24291349515550198]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @153, measure = [LogLoss{Float64} @362], measurement = [1.3298848709257214], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 2.948230623231027, 2.8865286548933877, 1.6536420826671274, 0.49090786476277887]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @549, measure = [LogLoss{Float64} @362], measurement = [0.15999340169417617], per_fold = [[0.0009348923543477548, 3.663735981263026e-15, 0.19836809907355993, 0.27803103721856987, 0.22738249078080375, 0.25524389073777193]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @666, measure = [LogLoss{Float64} @362], measurement = [0.17921898245495735], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 0.21948946241212902, 0.355368904439443, 0.24515591499818906, 0.2552996128799755]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @513, measure = [LogLoss{Float64} @362], measurement = [0.43396845367556147], per_fold = [[0.032017935938584687, 0.0035517913799105196, 0.3279876744559052, 0.34076666596321054, 1.5695175000603998, 0.32996915425535794]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @791, measure = [LogLoss{Float64} @362], measurement = [0.20260574192576317], per_fold = [[0.05897827146107524, 0.023926159977484502, 0.31074066688564467, 0.24801620383370668, 0.28777031701797273, 0.2862028323786951]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @494, measure = [LogLoss{Float64} @362], measurement = [0.18081516169424874], per_fold = [[0.026286073093313735, 0.004328541239222315, 0.23247200685256708, 0.2652779863486972, 0.29879618350865933, 0.2577301791230328]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @647, measure = [LogLoss{Float64} @362], measurement = [0.16722014124636594], per_fold = [[3.663735981263026e-15, 0.0017362286580712614, 0.24095642790561278, 0.3091028657536715, 0.2149655025672511, 0.23655982259358538]]) … (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @211, measure = [LogLoss{Float64} @362], measurement = [2.4255234628827984], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 4.358077628776857, 2.883492271129374, 2.8951777280414235, 4.416393149349128]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @948, measure = [LogLoss{Float64} @362], measurement = [0.2090407140329373], per_fold = [[0.02831564032065296, 0.004506791071048855, 0.27601640487668805, 0.30452267654243464, 0.3490217855967222, 0.2918609857900771]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @703, measure = [LogLoss{Float64} @362], measurement = [0.16083472141162658], per_fold = [[3.663735981263026e-15, 0.0010684484049683393, 0.21492655396160798, 0.2597106614778315, 0.22544275100841898, 0.2638599136169291]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @672, measure = [LogLoss{Float64} @362], measurement = [0.1994230987502936], per_fold = [[0.06116916887861973, 0.015584186177583987, 0.28215186319471763, 0.23756623145557718, 0.2987602428405842, 0.30130689995467874]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @278, measure = [LogLoss{Float64} @362], measurement = [0.41249122803305976], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 0.2657569427952801, 1.5936051181000055, 0.3018922342101013, 0.3136930730929644]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @884, measure = [LogLoss{Float64} @362], measurement = [0.2023236342991296], per_fold = [[0.06286249605753577, 0.022903792490086262, 0.2937978008986808, 0.24811267515787921, 0.2979618052214433, 0.2883032359691523]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @320, measure = [LogLoss{Float64} @362], measurement = [0.20189938007674044], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 0.2982214629431348, 0.35723702483212205, 0.272758416319512, 0.2831793763656666]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @226, measure = [LogLoss{Float64} @362], measurement = [0.6861204605077517], per_fold = [[3.663735981263026e-15, 3.663735981263026e-15, 0.39798567509449734, 1.6739374212558218, 1.62454223647529, 0.4202574302208935]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @637, measure = [LogLoss{Float64} @362], measurement = [0.15704927403459956], per_fold = [[3.663735981263026e-15, 0.00013355605062424815, 0.20897088898132604, 0.27368618829591623, 0.23393617273574116, 0.22556883814398604]]), (model = ProbabilisticEnsembleModel{DecisionTreeClassifier} @612, measure = [LogLoss{Float64} @362], measurement = [0.21668447999206406], per_fold = [[0.07637809228151302, 0.03248829731892487, 0.31802618054294335, 0.27937584863086456, 0.31631872838886144, 0.27751973278927705]])],
best_report = (measures = Any[],
oob_measurements = missing,),
plotting = (parameter_names = ["bagging_fraction", "atom.n_subfeatures"],
parameter_scales = [:log10, :linear],
parameter_values = Any[0.5671562610977313 2; 0.8815912549960212 3; … ; 0.5325205447199813 3; 0.5671562610977313 1],
measurements = [0.1785645358819857, 0.4463043411259328, 0.1592493722215185, 1.3298848709257214, 0.15999340169417617, 0.17921898245495735, 0.43396845367556147, 0.20260574192576317, 0.18081516169424874, 0.16722014124636594 … 2.4255234628827984, 0.2090407140329373, 0.16083472141162658, 0.1994230987502936, 0.41249122803305976, 0.2023236342991296, 0.20189938007674044, 0.6861204605077517, 0.15704927403459956, 0.21668447999206406],),)
Visualizing these results:
using Plots
plot(tuned)
Predicting on new data using the optimized model:
predict(tuned, Xnew)
3-element Array{UnivariateFinite{Multiclass{3},String,UInt32,Float64},1}:
UnivariateFinite{Multiclass{3}}(versicolor=>0.0, virginica=>0.0, setosa=>1.0)
UnivariateFinite{Multiclass{3}}(versicolor=>0.0, virginica=>0.0, setosa=>1.0)
UnivariateFinite{Multiclass{3}}(versicolor=>0.0, virginica=>0.0, setosa=>1.0)
Constructing a linear pipeline
Reference: Composing Models
Constructing a linear (unbranching) pipeline with a learned target transformation/inverse transformation:
X, y = @load_reduced_ames
@load KNNRegressor
pipe = @pipeline(X -> coerce(X, :age=>Continuous),
OneHotEncoder,
KNNRegressor(K=3),
target = UnivariateStandardizer)
Pipeline259(
one_hot_encoder = OneHotEncoder(
features = Symbol[],
drop_last = false,
ordered_factor = true,
ignore = false),
knn_regressor = KNNRegressor(
K = 3,
algorithm = :kdtree,
metric = Distances.Euclidean(0.0),
leafsize = 10,
reorder = true,
weights = :uniform),
target = UnivariateStandardizer()) @559
Evaluating the pipeline (just as you would any other model):
pipe.knn_regressor.K = 2
pipe.one_hot_encoder.drop_last = true
evaluate(pipe, X, y, resampling=Holdout(), measure=rms, verbosity=2)
┌───────────────────────────┬───────────────┬────────────┐
│ _.measure │ _.measurement │ _.per_fold │
├───────────────────────────┼───────────────┼────────────┤
│ RootMeanSquaredError @221 │ 53100.0 │ [53100.0] │
└───────────────────────────┴───────────────┴────────────┘
_.per_observation = [missing]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Inspecting the learned parameters in a pipeline:
mach = machine(pipe, X, y) |> fit!
F = fitted_params(mach)
F.one_hot_encoder
(fitresult = OneHotEncoderResult @485,)
Constructing a linear (unbranching) pipeline with a static (unlearned) target transformation/inverse transformation:
@load DecisionTreeRegressor
pipe2 = @pipeline(X -> coerce(X, :age=>Continuous),
OneHotEncoder,
DecisionTreeRegressor(max_depth=4),
target = y -> log.(y),
inverse = z -> exp.(z))
Pipeline270(
one_hot_encoder = OneHotEncoder(
features = Symbol[],
drop_last = false,
ordered_factor = true,
ignore = false),
decision_tree_regressor = DecisionTreeRegressor(
max_depth = 4,
min_samples_leaf = 5,
min_samples_split = 2,
min_purity_increase = 0.0,
n_subfeatures = 0,
post_prune = false,
merge_purity_threshold = 1.0),
target = WrappedFunction(
f = Main.ex-workflows.var"#28#29"()),
inverse = WrappedFunction(
f = Main.ex-workflows.var"#30#31"())) @858
Creating a homogeneous ensemble of models
Reference: Homogeneous Ensembles
X, y = @load_iris
tree_model = @load DecisionTreeClassifier
forest_model = EnsembleModel(atom=tree_model, bagging_fraction=0.8, n=300)
forest = machine(forest_model, X, y)
evaluate!(forest, measure=cross_entropy)
┌───────────────────────┬───────────────┬───────────────────────────────────────
│ _.measure │ _.measurement │ _.per_fold ⋯
├───────────────────────┼───────────────┼───────────────────────────────────────
│ LogLoss{Float64} @362 │ 0.427 │ [3.66e-15, 3.66e-15, 0.343, 0.376, 1 ⋯
└───────────────────────┴───────────────┴───────────────────────────────────────
1 column omitted
_.per_observation = [[[3.66e-15, 3.66e-15, ..., 3.66e-15], [3.66e-15, 3.66e-15, ..., 3.66e-15], [0.0134, 0.00669, ..., 3.66e-15], [3.66e-15, 0.124, ..., 3.66e-15], [3.66e-15, 0.0101, ..., 3.66e-15], [0.027, 0.452, ..., 0.0513]]]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]
Performance curves
Generate a plot of performance, as a function of some hyperparameter (building on the preceding example)
Single performance curve:
r = range(forest_model, :n, lower=1, upper=1000, scale=:log10)
curve = learning_curve(forest,
range=r,
resampling=Holdout(),
resolution=50,
measure=cross_entropy,
verbosity=0)
(parameter_name = "n",
parameter_scale = :log10,
parameter_values = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11 … 281, 324, 373, 429, 494, 569, 655, 754, 869, 1000],
measurements = [4.004850376568572, 4.004850376568572, 4.049902055247258, 4.036815051285438, 4.084968317774426, 1.8244269215692899, 1.8468233309667232, 1.8668965941528186, 1.8923221673402184, 1.9171558926328238 … 1.239151865694494, 1.231705368610279, 1.2333613533294105, 1.231082949478617, 1.2298493110802817, 1.2372010344223723, 1.243093651116299, 1.2454797990813242, 1.248950346714818, 1.246309076716773],)
using Plots
plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale)
Multiple curves:
curve = learning_curve(forest,
range=r,
resampling=Holdout(),
measure=cross_entropy,
resolution=50,
rng_name=:rng,
rngs=4,
verbosity=0)
(parameter_name = "n",
parameter_scale = :log10,
parameter_values = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11 … 281, 324, 373, 429, 494, 569, 655, 754, 869, 1000],
measurements = [8.009700753137146 7.20873067782343 15.218431430960575 4.004850376568572; 8.040507294495367 7.20873067782343 15.218431430960575 4.004850376568572; … ; 1.2609398407494588 1.2210492862915818 1.2472866893288048 1.2292213995690946; 1.2653153185845036 1.2211269697091909 1.2454437132580183 1.2293077280776372],)
plot(curve.parameter_values, curve.measurements,
xlab=curve.parameter_name, xscale=curve.parameter_scale)