Introduction
The java treestats example is extracted from the most popular open source projects, you can refer to the following example for usage.
Programming language: Java
Namespace/package name: hex.gbm.DTree.TreeModel
Example#1File:
GBM.javaProject:
jgustave/h2o
// Start by splitting all the data according to some criteria (minimize
// variance at the leaves). Record on each row which split it goes to, and
// assign a split number to it (for next pass). On *this* pass, use the
// split-number to build a per-split histogram, with a per-histogram-bucket
// variance.
@Override
protected GBMModel buildModel(
GBMModel model,
final Frame fr,
String names[],
String domains[][],
String[] cmDomain,
Timer t_build) {
// Tag out rows missing the response column
new ExcludeNAResponse().doAll(fr);
// Build trees until we hit the limit
int tid;
DTree[] ktrees = null; // Trees
TreeStats tstats = new TreeStats(); // Tree stats
for (tid = 0; tid < ntrees; tid++) {
// During first iteration model contains 0 trees, then 0-trees, then 1-tree,...
// BUT if validation is not specified model does not participate in voting
// but on-the-fly computed data are used
model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, false, false, false);
// ESL2, page 387
// Step 2a: Compute prediction (prob distribution) from prior tree results:
// Work <== f(Tree)
new ComputeProb().doAll(fr);
// ESL2, page 387
// Step 2b i: Compute residuals from the prediction (probability distribution)
// Work <== f(Work)
new ComputeRes().doAll(fr);
// ESL2, page 387, Step 2b ii, iii, iv
Timer kb_timer = new Timer();
ktrees = buildNextKTrees(fr);
Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString());
if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore
// Check latest predictions
tstats.updateBy(ktrees);
}
// Final scoring
model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, true, false, false);
return model;
}
Example#2File:
DRF.javaProject:
rohit2412/h2o
@Override
protected DRFModel buildModel(
DRFModel model, final Frame fr, String names[], String domains[][], final Timer t_build) {
// Append number of trees participating in on-the-fly scoring
fr.add("OUT_BAG_TREES", response.makeZero());
// The RNG used to pick split columns
Random rand = createRNG(_seed);
// Prepare working columns
new SetWrkTask().doAll(fr);
int tid;
DTree[] ktrees = null;
// Prepare tree statistics
TreeStats tstats = new TreeStats();
// Build trees until we hit the limit
for (tid = 0; tid < ntrees; tid++) { // Building tid-tree
model =
doScoring(
model, fr, ktrees, tid, tstats, tid == 0, !hasValidation(), build_tree_one_node);
// At each iteration build K trees (K = nclass = response column domain size)
// TODO: parallelize more? build more than k trees at each time, we need to care about
// temporary data
// Idea: launch more DRF at once.
Timer kb_timer = new Timer();
ktrees = buildNextKTrees(fr, _mtry, sample_rate, rand, tid);
Log.info(Sys.DRF__, (tid + 1) + ". tree was built " + kb_timer.toString());
if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore
// Check latest predictions
tstats.updateBy(ktrees);
}
model = doScoring(model, fr, ktrees, tid, tstats, true, !hasValidation(), build_tree_one_node);
// Make sure that we did not miss any votes
assert !importance
|| _treeMeasuresOnOOB.npredictors() == _treeMeasuresOnSOOB[0 /*variable*/].npredictors()
: "Missing some tree votes in variable importance voting?!";
return model;
}